In [None]:
## Load Dataframe
import pandas as pd
import warnings
import fnmatch
import os

pd.set_option('future.no_silent_downcasting', True)
root = "cookieScan"
def loadDataFrameFromFileRegex(root, regex):
    df_arr = []
    for path, subdirs, files in os.walk(root):
        for name in files:
            if fnmatch.fnmatch(name, regex) and os.path.getsize(os.path.join(path, name)) > 0:
                # print(os.path.join(path, name))
                df = pd.read_csv(os.path.join(path, name))
                df_arr.append(df)
    if not df_arr:
        warnings.warn("No matching file found in "+root+" for regex: "+regex+". Empty dataframe will be returned." )
        return pd.DataFrame()    
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=FutureWarning)      
        return pd.concat(df_arr, ignore_index=True)
        
df = loadDataFrameFromFileRegex(root, 'COOKIESCAN-*.csv')
a = df['action'].unique()
print(sorted(a))

df = df.groupby(['tenant','domain', 'scan-id']).agg(first=('timestamp', 'min'),last=('timestamp', 'max')).reset_index()
df = df.pivot(index=['tenant','dsr_ticket'], columns='action', values=['first','last']).reset_index()
df.columns = [' '.join(col).strip() for col in df.columns.values]
# display(df)

df['created_date']=df[['first dsr-ticketcreate', 'first generic-dsr-response', 'first ticket_create_timeout', 'first dsr-taskgen']].min(axis=1)
df['published_date']=df[['last dsr-bundle-complete-attachments', 'first dsr-ticket-draft-messages-deletion', 'last dsr-reject']].max(axis=1)
df['Robotic Automation']=  ~pd.isna(df[['last dd_dsr_exec', 'last dsr-validation-task', \
                             'last generic-dsr-response', 'last dsr-file-scan-response', \
                             'last pd-attribute-update-request']].max(axis=1))

df = df.dropna(subset=[ 'created_date', 'published_date', 'Robotic Automation'], how='all').reset_index()
df = df[['tenant','dsr_ticket', 'created_date', 'published_date', 'Robotic Automation']]
df['created_date']=(pd.to_datetime(df['created_date'],unit='ms')) 
df['published_date']=(pd.to_datetime(df['published_date'],unit='ms')) 
df.to_csv("dsr_report.csv")
display(df)