# Visualization of the Data

In [2]:
import pandas as pd
import numpy as np
import glob

def load_preprocess_data():
    path = r'./Data-out' 
    all_files = glob.glob(path + "/hda_dataset_uniform_random_[0-7].csv")
    all_files.sort()
    
    #Load all data
    df = (pd.read_csv(f) for f in all_files)
    
    #Merge all the files with each other
    #Drop all duplicates
    concat_df = pd.concat(df).drop_duplicates().reset_index(drop=True)
    concat_df = concat_df.sort_values(by=['case', 'ts'])
    #reset index
    concat_df = concat_df.reset_index(drop=True)
    #Create a csv file
    #concat_df.to_csv('test1.csv')
    return concat_df

In [21]:
df = load_preprocess_data()
df

Unnamed: 0,case,event,ts
0,1,Order imported,2018-09-02 00:02:30
1,1,Order checked,2018-09-02 00:02:41
2,1,Address correction,2018-09-02 00:06:21
3,1,Order creation failed (SYS-2),2018-09-02 00:07:00
4,1,Device hand-out,2018-09-03 16:36:00
...,...,...,...
1405607,263247,Delivered,2018-12-04 09:27:41
1405608,263247,Activated,2018-12-04 09:51:03
1405609,263247,Printed (ERP),2018-12-04 09:51:21
1405610,263247,Customer ID transmitted,2018-12-05 09:52:10


# Visualisierung mit Plotly Express

In [4]:
import plotly.express as px

fig = px.scatter(df, x="ts", y="case", color="event")
fig.update_layout(
    title="All events on a period",
    xaxis_title="Timestamp",
    yaxis_title="Case Number",
    font=dict(
        family="Courier New, monospace",
        size=18,
        color="#7f7f7f"
    )
)

py.offline.plot(fig, filename="poltly_express.html")


'poltly_express.html'

In [4]:
import plotly_express as px
import plotly.graph_objs as go
import ipywidgets as widgets
import pandas as pd
from ipywidgets import interactive, HBox, VBox, Layout
import plotly.offline as py
import time

'''
data = pd.read_csv("./Data-out/hda_dataset_uniform_random_0.csv")
data = data[:100]
'''

data = df
data_array = data.event.unique()
#print(data)

trace = px.scatter(data, x="ts", y="case", color="event")
fig = go.FigureWidget(trace)

#Funktion für DropDown Feature
def update_event_dd(select_event):
    print(select_event)
                
    selected_data = data.loc[data.event == select_event]
    fig = px.scatter(selected_data, x="ts", y="case", color="event")
    display(fig)
    
    #py.offline.plot(fig, filename="poltly_express.html")


# Funktion für Select Multiple Feature
def update_event_sm(select_event):
    #print(select_event)
    event_list = []
    if select_event:
        for i in select_event:
            event_list.append(i)
            
    selected_data = data.loc[data.event.isin(event_list)]
    #print(event_list)
    #selected_data = data.filter(event_list)
    #print(selected_data)
    trace1 = px.scatter(selected_data, x="ts", y="case", color="event")
    fig.data = []
    fig.update(data = [d.to_plotly_json() for d in trace1.data])
    
#Funktion für ComboBox
combobox_arr = []
def update_event_cb(select_event):
    if select_event:
        combobox_arr.append(select_event)
        text_area.value = ', '.join(combobox_arr)
    time.sleep(0.5)
    combobox.value =''
          
#Funktion für Filter zurücksetzen  
def clear_filter(b):
    empty_list =[]
    #select_multiple.value = tuple(empty_list)
    global combobox_arr
    combobox_arr = empty_list
    selected_data = data.loc[data.event.isin(combobox_arr)]
    trace1 = px.scatter(selected_data, x="ts", y="case", color="event")
    fig.data = []
    fig.update(data = [d.to_plotly_json() for d in trace1.data])
    text_area.value = ''

#Funktion für alle Events im Filter setzen    
def set_filter(b):
    #select_multiple.value = tuple(data_array)
    trace1 = px.scatter(data, x="ts", y="case", color="event")
    fig.data = []
    fig.update(data = [d.to_plotly_json() for d in trace1.data])
    text_area.value = ', '.join(data_array)
    
#Funktion für submit button    
def submit_filter(b):
    selected_data = data.loc[data.event.isin(combobox_arr)]
    trace1 = px.scatter(selected_data, x="ts", y="case", color="event")
    fig.data = []
    fig.update(data = [d.to_plotly_json() for d in trace1.data])
    

event_tuple = tuple(data_array)  
#Definition der Widgets
select_multiple = widgets.SelectMultiple(options=data_array, value=event_tuple ,description='Event')
combobox = widgets.Combobox(placeholder="Filter event", options=event_tuple, description="Filter", 
                            ensure_option=True, disabled=False,
                           layout=Layout(width='800px'))
event_dropdown = widgets.Dropdown(options=data_array, description= 'Event')

#Definition von Buttons
btn_clearfilter = widgets.Button(description='Clear Filter', button_style='danger')
btn_setfilter = widgets.Button(description='Set All', button_style='info' )
btn_submit = widgets.Button(description='Submit')
text_area = widgets.Textarea(placeholder='Selected Filter', disabled = True, 
                             layout=Layout(width='800px', height='35px'))

#Interaktion der Widgets
sm = interactive(update_event_sm, select_event=select_multiple)
cb = interactive(update_event_cb, select_event=combobox)
btn_clearfilter.on_click(clear_filter)
btn_setfilter.on_click(set_filter)
btn_submit.on_click(submit_filter)


VBox([cb, text_area,HBox([btn_submit, btn_setfilter, btn_clearfilter]), fig])

VBox(children=(interactive(children=(Combobox(value='', description='Filter', ensure_option=True, layout=Layou…

### NUR ZUM TESTEN

In [31]:
import plotly.graph_objs as go
from ipywidgets import Button, HBox, VBox
import plotly_express as px
import ipywidgets as widgets
import pandas as pd
import plotly.offline as py

data = pd.read_csv("./Data-out/hda_dataset_uniform_random_0.csv")
data = data[:100]
data_array = data.event.unique()
data1 = data.loc[data.event.isin(["Invoiced"])]

trace = px.scatter(data, x="ts", y="case", color="event")
fig = go.FigureWidget(trace)
#print(fig)


def update_axes(b):
    trace1 = px.scatter(data1, x="ts", y="case", color="event")
    fig.data = []
    fig.update(data = [d.to_plotly_json() for d in trace1.data])
    
    


    
btn_setfilter = widgets.Button(description='Set All', button_style='info' )
btn_setfilter.on_click(update_axes)
    

VBox([btn_setfilter, fig])

VBox(children=(Button(button_style='info', description='Set All', style=ButtonStyle()), FigureWidget({
    'da…

### Prepare Data for Clustering

In [95]:
brands = pd.read_csv("./Data-out/brands.csv")
channels = pd.read_csv("./Data-out/channels.csv")
devices = pd.read_csv("./Data-out/devices.csv")

devices

Unnamed: 0,pid,DTYPE
0,63422,DEVICE_TYPE_23
1,63423,DEVICE_TYPE_156
2,63424,DEVICE_TYPE_111
3,63425,DEVICE_TYPE_99
4,63426,DEVICE_TYPE_135
...,...,...
128135,176009,DEVICE_TYPE_111
128136,176010,DEVICE_TYPE_27
128137,176011,DEVICE_TYPE_135
128138,176012,DEVICE_TYPE_135


In [47]:
brandarr = brands.BRAND.unique()
print(myarr)
brands.BRAND = brands.BRAND.map({'BRAND_0': 0, 'BRAND_1': 1, 'BRAND_2': 3, 'BRAND_4': 4, 'BRAND_5': 5, 'BRAND_6': 6, 'BRAND_7': 7, 'BRAND_8': 8, 'BRAND_9': 9})

['BRAND_0' 'BRAND_1' 'BRAND_2' 'BRAND_3' 'BRAND_4' 'BRAND_5' 'BRAND_6'
 'BRAND_7' 'BRAND_8' 'BRAND_9']


In [51]:
print(channels.CHANNEL.unique()) 
channels.CHANNEL = channels.CHANNEL.map({'Onlineshop': 0, 'Telesales': 1, 'Telemarketing': 2})


['Onlineshop' 'Telesales' 'Telemarketing']


In [96]:
new_data = df.join(channels.set_index("pid"), on="case")
new_data = new_data.join(brands.set_index("pid"), on="case")
new_data = new_data.join(devices.set_index("pid"), on="case")

new_data

Unnamed: 0,case,event,ts,CHANNEL,BRAND,DTYPE
0,1,Order imported,2018-09-02 00:02:30,Onlineshop,BRAND_1,
1,1,Order checked,2018-09-02 00:02:41,Onlineshop,BRAND_1,
2,1,Address correction,2018-09-02 00:06:21,Onlineshop,BRAND_1,
3,1,Order creation failed (SYS-2),2018-09-02 00:07:00,Onlineshop,BRAND_1,
4,1,Device hand-out,2018-09-03 16:36:00,Onlineshop,BRAND_1,
...,...,...,...,...,...,...
1405607,263247,Delivered,2018-12-04 09:27:41,Onlineshop,BRAND_6,
1405608,263247,Activated,2018-12-04 09:51:03,Onlineshop,BRAND_6,
1405609,263247,Printed (ERP),2018-12-04 09:51:21,Onlineshop,BRAND_6,
1405610,263247,Customer ID transmitted,2018-12-05 09:52:10,Onlineshop,BRAND_6,


In [28]:
data = df['event']
data = data[:100]
pairs = []


for i in range(len(data)-1):
    pair = data[i] + ", " + data[i+1]
    pairs.append(pair)
    ++i
    
for j in [ele for ind, ele in enumerate(pairs,1) if ele not in pairs[ind:]]:
    count = 0
    for ele in pairs:
        if j == ele:
            count += 1
    print("{} {}".format(count,j))
    count = 0

    

1 Order checked, Address correction
1 Address correction, Order creation failed (SYS-2)
1 Device hand-out, Ready for payment processing
1 Ready for payment processing, Documents complete (ERP)
1 Processing return delivery, Suspicious (Fraud)
2 Order imported, Suspicious (Fraud)
3 Suspicious (Fraud), Order checked
2 Order checked, Order creation failed (SYS-2)
3 Order creation failed (SYS-2), Device hand-out
2 Invoiced, Return delivery initiated
2 Return delivery initiated, Processing return delivery
1 Processing return delivery, Order imported
1 Documents complete (ERP), Goods in arrears
1 Goods in arrears, Ready to deliver
3 Invoiced, Order imported
6 Delivery approved, In delivery
6 In delivery, Documents complete (ERP)
6 Documents complete (ERP), Ready to deliver
7 Ready to deliver, Delivered
7 Delivered, Activated
7 Activated, Printed (ERP)
7 Printed (ERP), Customer ID transmitted
7 Customer ID transmitted, Invoiced
2 Invoiced, Export from online shop
2 Export from online shop, Ord

In [67]:
from collections import Counter
import csv

data = df['event']
#data = data[:100]
pairs = []


for i in range(len(data)-1):
    pair = data[i] + "," + data[i+1]
    pairs.append(pair)
    ++i

# Finding count of each element
list_freq= (Counter(pairs))

#Printing result of counter
#print(list_freq)

# # Printing it using loop
# for key, value in list_freq.items():
#     print(value, key)'


with open('counter.csv', mode='w') as fp:
    writer = csv.writer(fp)
    fp.write('current,next,count\n')
    for key, value in list_freq.items():  
        fp.write('{},{}\n'.format(key, value))  
    


In [69]:
test = pd.read_csv('./counter.csv')
test.sort_values(by='count', ascending=False)

Unnamed: 0,current,next,count
8,Ready to deliver,Delivered,103730
27,Export from online shop,Order imported,103372
5,Delivery approved,In delivery,91752
0,Order imported,Order checked,86261
4,Device hand-out,Delivery approved,80135
...,...,...,...
435,Order creation failed (SYS-2),Activated,1
434,Delivered,Order creation failed (SYS-2),1
149,Return delivery approved,Documents complete (ERP),1
432,Suspicious (Fraud),Documents complete (ERP),1
