In [18]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import spearmanr
import datetime
from datetime import timedelta
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.min_rows', 30)
pd.set_option('display.max_rows',100)
pd.set_option('display.width', 1000)

def general_plot(x_, y_, x_bounds = None, y_bounds = None, title = None, x_title = None, y_title = None):
    fig, ax = plt.subplots(figsize=(15, 4))

    plt.xticks(rotation=90)
    plt.grid(True)
    
    if title is not None:
        plt.title(title)
    
    if x_title is not None:    
        plt.xlabel(x_title)
        
    if y_title is not None:    
        plt.ylabel(y_title)
    
    if (x_bounds is not None):
        plt.xlim(x_bounds)
               
    if (y_bounds is not None):
        plt.ylim(y_bounds)
    ax.plot(x_,y_, 'bo')
    
    plt.show()
    
# Useful classes to have easier access to data features
class ColumnsInput:
    barcode = 'ddc_barcode'
    ipcode = 'ddc_ipcode'
    machine = 'ddc_mch_code'
    machine_side = 'ddc_mch_side'
    event = 'ddc_ev_subcode'
    time = 'ddc_ev_timestamp'
    
    
class ColumnsOutput:
    c_machine = 'c_machine' 
    event_delta_time = 'event_delta_time'
    day = 'y-m-day'
    month = 'month'
    status = 'status'

In [19]:

df=pd.read_csv("Data/preprocessed_data_2021.csv", low_memory=False)

In [20]:
df[ColumnsInput.time] = pd.to_datetime(df[ColumnsInput.time])
df[ColumnsOutput.status] = "False"

In [21]:
df.sort_values(ColumnsInput.time).groupby(ColumnsInput.barcode)[ColumnsInput.event].agg(['first','last']).value_counts()

first                   last                  
LO_LOADER_IN_PRESS      UN_FORK_OUT               17533
                        UN_UNLOADER_OUT            9427
                        UN_TCR_DOWN                 770
                        UN_TCR_UP_LMR_DOWN          258
                        OP_PRESS_LOCKED              37
                        UN_UNLOADER_UP               24
                        OP_UNLOCK_PRESS              19
                        CURING_OFF                   14
                        UN_TCR_UP_NO_SUV             12
                        UN_VACUUM                     9
CURING_OFF              CURING_ON                     8
UN_FORK_OUT             UN_TCR_DOWN                   7
LO_LOADER_IN_PRESS      UN_SWING_IN_ARMS              7
                        OP_SQUEEZE_OFF                6
UN_VACUUM               UN_FORK_OUT                   5
LO_LOADER_IN_PRESS      OP_PRESS_MOVEMENT_UP_2        4
                        OP_PRESS_MOVEMENT_UP_3        4
 

In [22]:
last_indexes = df.sort_values(ColumnsInput.time).groupby(ColumnsInput.barcode)[ColumnsInput.event].agg(['last']).reset_index()
first_indexes = df.sort_values(ColumnsInput.time).groupby(ColumnsInput.barcode)[ColumnsInput.event].agg('first').reset_index()
first_last_indexes = df.sort_values(ColumnsInput.time).groupby(ColumnsInput.barcode)[ColumnsInput.event].agg(['first', 'last']).reset_index()
print(len(last_indexes))
print(len(first_indexes))
first_last_indexes

28189
28189


Unnamed: 0,ddc_barcode,first,last
0,9430887732,LO_LOADER_IN_PRESS,UN_FORK_OUT
1,9430887737,LO_LOADER_IN_PRESS,UN_FORK_OUT
2,9430888020,LO_LOADER_IN_PRESS,UN_FORK_OUT
3,9430890266,LO_LOADER_IN_PRESS,UN_FORK_OUT
4,9430890274,LO_LOADER_IN_PRESS,UN_FORK_OUT
5,9430890277,LO_LOADER_IN_PRESS,UN_FORK_OUT
6,9435411478,LO_LOADER_IN_PRESS,UN_UNLOADER_OUT
7,9435411953,LO_LOADER_IN_PRESS,UN_UNLOADER_OUT
8,9435411967,LO_LOADER_IN_PRESS,UN_UNLOADER_OUT
9,9435411976,LO_LOADER_IN_PRESS,UN_UNLOADER_OUT


In [23]:
first_last_indexes[ColumnsOutput.status] = False
print(first_last_indexes.columns)
starting_event = ["LO_LOADER_IN_PRESS", "LO_LOADER_IN_PRESS_START"]
ending_event = ["UN_UNLOADER_OUT", "UN_FORK_OUT", "UN_UNLOADER_OUT_STOP", "UN_FORK_OUT_STOP"]

first_last_indexes[ColumnsOutput.status] = first_last_indexes.apply(lambda x: 'CYCLE_COMPLETED' if x['last'] in ending_event else 'CYCLE_ABORTED' if x['first'] in starting_event else 'CYCLE_NOT_STARTED', axis=1)
first_last_indexes.value_counts(ColumnsOutput.status)
first_last_indexes

Index(['ddc_barcode', 'first', 'last', 'status'], dtype='object')


Unnamed: 0,ddc_barcode,first,last,status
0,9430887732,LO_LOADER_IN_PRESS,UN_FORK_OUT,CYCLE_COMPLETED
1,9430887737,LO_LOADER_IN_PRESS,UN_FORK_OUT,CYCLE_COMPLETED
2,9430888020,LO_LOADER_IN_PRESS,UN_FORK_OUT,CYCLE_COMPLETED
3,9430890266,LO_LOADER_IN_PRESS,UN_FORK_OUT,CYCLE_COMPLETED
4,9430890274,LO_LOADER_IN_PRESS,UN_FORK_OUT,CYCLE_COMPLETED
5,9430890277,LO_LOADER_IN_PRESS,UN_FORK_OUT,CYCLE_COMPLETED
6,9435411478,LO_LOADER_IN_PRESS,UN_UNLOADER_OUT,CYCLE_COMPLETED
7,9435411953,LO_LOADER_IN_PRESS,UN_UNLOADER_OUT,CYCLE_COMPLETED
8,9435411967,LO_LOADER_IN_PRESS,UN_UNLOADER_OUT,CYCLE_COMPLETED
9,9435411976,LO_LOADER_IN_PRESS,UN_UNLOADER_OUT,CYCLE_COMPLETED


In [24]:
set_cycle = first_last_indexes.groupby(ColumnsOutput.status)[ColumnsInput.barcode].unique() #9430887732ù
del first_last_indexes
df[ColumnsOutput.status] = df.apply(lambda x: 'CYCLE_COMPLETED' if x[ColumnsInput.barcode] in set_cycle['CYCLE_COMPLETED'] else 'CYCLE_ABORTED' if x[ColumnsInput.barcode] in set_cycle['CYCLE_ABORTED'] else 'CYCLE_NOT_STARTED', axis=1)
df

Unnamed: 0.1,Unnamed: 0,ddc_barcode,ddc_ipcode,ddc_mch_code,ddc_mch_side,ddc_ev_subcode,ddc_ev_timestamp,c_machine,month,y-m-day,status
0,0,9436800087,37887,1305,R,LO_LOADER_IN_PRESS,2021-12-17 09:15:23,False,12,21-12-17,CYCLE_ABORTED
1,1,9436800087,37887,1305,R,LO_BLADDER_VACUUM,2021-12-17 09:15:27,False,12,21-12-17,CYCLE_ABORTED
2,2,9436800087,37887,1305,R,LO_BLADDER_PRESHAPING,2021-12-17 09:15:27,False,12,21-12-17,CYCLE_ABORTED
3,3,9436800087,37887,1305,R,LO_LOADER_DOWN,2021-12-17 09:15:34,False,12,21-12-17,CYCLE_ABORTED
4,4,9436800087,37887,1305,R,LO_TCR_DOWN,2021-12-17 09:15:38,False,12,21-12-17,CYCLE_ABORTED
5,5,9436800087,37887,1305,R,LO_PRESHAPING,2021-12-17 09:15:41,False,12,21-12-17,CYCLE_ABORTED
6,6,9436800087,37887,1305,R,LO_LOADER_UP,2021-12-17 09:15:56,False,12,21-12-17,CYCLE_ABORTED
7,7,9436800087,37887,1305,R,LO_LOADER_RELEASE_TIRE,2021-12-17 09:15:56,False,12,21-12-17,CYCLE_ABORTED
8,8,9436800087,37887,1305,R,LO_LOADER_OUT,2021-12-17 09:15:58,False,12,21-12-17,CYCLE_ABORTED
9,9,9436800087,37887,1305,R,CL_UNLOCK_PRESS,2021-12-17 09:16:02,False,12,21-12-17,CYCLE_ABORTED


In [27]:
aborted_jobs = df[df[ColumnsOutput.status] == 'CYCLE_ABORTED']
print(aborted_jobs)
save_df = df.to_csv('Data/label_preprocessed_data_2021.csv', index=False)
save_aborts = aborted_jobs.to_csv('Data/aborted_jobs_2022.csv', index=False)

        Unnamed: 0  ddc_barcode  ddc_ipcode ddc_mch_code ddc_mch_side          ddc_ev_subcode    ddc_ev_timestamp  c_machine  month   y-m-day         status
0                0   9436800087       37887         1305            R      LO_LOADER_IN_PRESS 2021-12-17 09:15:23      False     12  21-12-17  CYCLE_ABORTED
1                1   9436800087       37887         1305            R       LO_BLADDER_VACUUM 2021-12-17 09:15:27      False     12  21-12-17  CYCLE_ABORTED
2                2   9436800087       37887         1305            R   LO_BLADDER_PRESHAPING 2021-12-17 09:15:27      False     12  21-12-17  CYCLE_ABORTED
3                3   9436800087       37887         1305            R          LO_LOADER_DOWN 2021-12-17 09:15:34      False     12  21-12-17  CYCLE_ABORTED
4                4   9436800087       37887         1305            R             LO_TCR_DOWN 2021-12-17 09:15:38      False     12  21-12-17  CYCLE_ABORTED
5                5   9436800087       37887         1305  