# Distill Example

## Imports ##

In [1]:
import json
import distill
import pandas as pd

## Data Setup ##

In [2]:
with open('./tests/data/sample_data.json') as json_file:
    raw_data = json.load(json_file)

data = {}
for log in raw_data:
    data[distill.getUUID(log)] = log

# Convert clientTime to specified type
date_type = "integer"
for uid in data:
    log = data[uid]
    client_time = log['clientTime']
    if date_type == "integer":
        log['clientTime'] = distill.epoch_to_datetime(client_time)
    elif date_type == "datetime":
        log['clientTime'] = pd.to_datetime(client_time, unit='ms', origin='unix')
    elif date_type == "string":
        log['clientTime'] = str(client_time)

# Sort
sorted_data = sorted(data.items(), key=lambda kv: kv[1]['clientTime'])
sorted_dict = dict(sorted_data)

## Making Toy Segments ##

In [3]:
segments = distill.generate_fixed_time_segments(sorted_dict, 5, label="generated")

print(segments)

Segments: [
Segment: name=generated0, num_logs=3, start=1623691890656, end=1623691895656, type=Segment_Type.FIXED_TIME
Segment: name=generated1, num_logs=0, start=1623691895656, end=1623691900656, type=Segment_Type.FIXED_TIME
Segment: name=generated2, num_logs=9, start=1623691900656, end=1623691905656, type=Segment_Type.FIXED_TIME
Segment: name=generated3, num_logs=7, start=1623691905656, end=1623691910656, type=Segment_Type.FIXED_TIME
]


### Returning Segments object ###

In [15]:
type(segments)

distill.segmentation.segments.Segments

### Iteration ###

In [16]:
for segment in segments:
    print(segment.get_segment_name())

generated0
generated1
generated2
generated3


In [17]:
for segment in segments:
    print(segment.get_num_logs())

3
0
9
7


### List Comprehensions should work as normal directly with the `Segments` object ###

In [18]:
number_of_logs = [segment.num_logs for segment in segments]
print(number_of_logs)

[3, 0, 9, 7]


In [19]:
segment_names = [segment.segment_name for segment in segments]
print(segment_names)

['generated0', 'generated1', 'generated2', 'generated3']


### Using Subscripts ###

In [20]:
print(segments[0].get_segment_name())
print(segments[1].get_segment_name())
print(segments[2].get_segment_name())
print(segments[3].get_segment_name())

generated0
generated1
generated2
generated3


### `Segments` are Mutable ###

In [21]:
segments[1].segment_name = "new_name"

print(segments)

Segments: [
Segment: name=generated0, num_logs=3, start=1623691890656, end=1623691895656, type=Segment_Type.FIXED_TIME
Segment: name=new_name, num_logs=0, start=1623691895656, end=1623691900656, type=Segment_Type.FIXED_TIME
Segment: name=generated2, num_logs=9, start=1623691900656, end=1623691905656, type=Segment_Type.FIXED_TIME
Segment: name=generated3, num_logs=7, start=1623691905656, end=1623691910656, type=Segment_Type.FIXED_TIME
]


### `Segments` Filtering ###

In [22]:
# Let's say we don't want segments with less than 3 logs
num_logs_segments = segments.get_num_logs(3)

print(num_logs_segments)

Segments: [
Segment: name=generated0, num_logs=3, start=1623691890656, end=1623691895656, type=Segment_Type.FIXED_TIME
Segment: name=generated2, num_logs=9, start=1623691900656, end=1623691905656, type=Segment_Type.FIXED_TIME
Segment: name=generated3, num_logs=7, start=1623691905656, end=1623691910656, type=Segment_Type.FIXED_TIME
]


In [23]:
# We could also modify the orig segments object itself
segments = segments.get_num_logs(3)
print(segments)

Segments: [
Segment: name=generated0, num_logs=3, start=1623691890656, end=1623691895656, type=Segment_Type.FIXED_TIME
Segment: name=generated2, num_logs=9, start=1623691900656, end=1623691905656, type=Segment_Type.FIXED_TIME
Segment: name=generated3, num_logs=7, start=1623691905656, end=1623691910656, type=Segment_Type.FIXED_TIME
]


## Return different data structures ##

### Dictionary: What we had before but might be obsolete now... ###

In [24]:
segments_dict = segments.get_segment_name_dict()

for segment_name in segments_dict:
    print("key=" + str(segment_name) + ", value=" + str(segments_dict[segment_name]))

[<distill.segmentation.segment.Segment object at 0x7fa398980b80>, <distill.segmentation.segment.Segment object at 0x7fa398980940>, <distill.segmentation.segment.Segment object at 0x7fa398980a00>]
key=generated0, value=Segment: name=generated0, num_logs=3, start=1623691890656, end=1623691895656, type=Segment_Type.FIXED_TIME
key=generated2, value=Segment: name=generated2, num_logs=9, start=1623691900656, end=1623691905656, type=Segment_Type.FIXED_TIME
key=generated3, value=Segment: name=generated3, num_logs=7, start=1623691905656, end=1623691910656, type=Segment_Type.FIXED_TIME


### List of Segments ###

In [26]:
# Still prints as the object since we are printing the list
segments_list = segments.get_segment_list()
print(segments_list)

[<distill.segmentation.segment.Segment object at 0x7fa398980b80>, <distill.segmentation.segment.Segment object at 0x7fa398980940>, <distill.segmentation.segment.Segment object at 0x7fa398980a00>]
