In [1]:
import os 
import pandas as pd
import re
import numpy as np

In [2]:
name_pattern = '(?s:.*)(ssl|init|lfs|delay)'
probability_pattern  = '\d\.\d{1,2}'

In [3]:
def get_type(x):
    try:
        return re.search(name_pattern,x).group(1)
    except Exception as e:
        return 'init'

def get_probability(x):
    proba = re.findall(probability_pattern,x)
    if proba:
        return proba[-1]
    

In [22]:

def prepare_logs(logs_file,info_from_hyperparameters = []):

    # read data
    res = pd.read_csv(os.path.join('logs',logs_file),header= None)

    # renaming columns
    res = res.rename(columns={0:'stream',1:'hyperparameters',2:'B',3:'frequency'})
    res['hyperparameters'] = res['hyperparameters'].apply(lambda x: dict([i.split(";") for i in re.findall( '\((.*?)\)',x)]))
    B = res['B'].iloc[0]
    cl = res.iloc[:,B+6:].columns
    res = res.rename(columns = dict(zip(res.columns[4:B+6],[f"B_{i}" for i in range(1,B+3)])))
    res = res.rename(columns = dict(zip(cl,[f"m_{i}" for i in range(len(cl))])))

    # adding new columns
    res['date'] = res['stream'].apply(lambda x: x.split(' - ')[0])
    res['stream'] = res['stream'].apply(lambda x: x.split(' - ')[1])
    res['type'] = res['stream'].apply(lambda x :  get_type(x))
    res['probability'] = res['stream'].apply(lambda x :  get_probability(x))
    
    for inf in info_from_hyperparameters:
        res['classifier'] = res['hyperparameters'].apply(lambda x : 'CluStream' if 'classifier' not in list(x.keys()) else 'ICLC' if 'classifier_n_models' in list(x.keys()) else x['classifier'])
        res['threshold'] = res['hyperparameters'].apply(lambda x : None if 'threshold' not in list(x.keys()) else x['threshold'])
    # res['classifier'] = 'CluStream'
    # res['threshold'] = 0.1



    #change order
    new_order = list(res.columns[:2])
    new_order.extend(res.columns[-(3+len(info_from_hyperparameters)):])
    new_order.extend(res.columns[2:-(3+len(info_from_hyperparameters))])
    res = res[new_order]

    return res


In [5]:
def save_formatted_logs(res,file):
    res.to_csv(os.path.join("logs_formatted",file),index = False)

In [6]:
def concatenate_res(res,file):
    new_res = pd.read_csv(os.path.join("logs_formatted",file))
    res = pd.concat([new_res,res])
    res.to_csv(os.path.join("logs_formatted",file),index = False)

In [18]:
mapper = dict(zip(["<class 'river.forest.adaptive_random_forest.ARFClassifier'>",
 "<class 'river.tree.hoeffding_adaptive_tree_classifier.HoeffdingAdaptiveTreeClassifier'>",
 "<class 'river.naive_bayes.gaussian.GaussianNB'>", "<class 'river.dummy.PriorClassifier'>","<class 'river.dummy.NoChangeClassifier'>","CluStream"],['ARF','HT','NB','Majority','NoChange',"CluStream"]))

In [66]:
os.listdir('logs')

['01_07_2023_12_34_42.log',
 '01_07_2023_12_51_14.log',
 '01_07_2023_13_53_36.log',
 '01_07_2023_14_39_22.log',
 '02_07_2023_08_29_08.log',
 '30_06_2023_22_13_56.log',
 '30_06_2023_22_17_04.log']

In [146]:
 os.listdir('logs')

['02_07_2023_08_29_08.log',
 '02_07_2023_12_46_04.log',
 '02_07_2023_12_47_33.log',
 '03_07_2023_10_25_50.log',
 '03_07_2023_10_29_49.log']

In [159]:
logs_file = os.listdir('logs')[-4]
print(logs_file)
res = prepare_logs(logs_file,['classifier','threshold'])
# res['classifier']='ICLC'

res

02_07_2023_12_46_04.log


Unnamed: 0,stream,hyperparameters,date,type,probability,classifier,threshold,B,frequency,B_1,...,m_390,m_391,m_392,m_393,m_394,m_395,m_396,m_397,m_398,m_399
0,LED_abrubt_0_20000,"{'n_models': '10', 'max_features': '5', 'lambd...",02-Jul-23 13:00:55,init,,<class 'river.forest.adaptive_random_forest.AR...,0.7,50,100,0.0,...,,,,,,,,,,
1,LED_abrubt_0_20000,"{'grace_period': '200', 'max_depth': 'inf', 's...",02-Jul-23 13:05:59,init,,<class 'river.tree.hoeffding_adaptive_tree_cla...,0.7,50,100,0.0,...,,,,,,,,,,
2,LED_abrubt_0_20000_constant_delay,"{'n_models': '10', 'max_features': '5', 'lambd...",02-Jul-23 13:44:23,delay,,<class 'river.forest.adaptive_random_forest.AR...,0.7,50,100,0.657182,...,,,,,,,,,,
3,LED_abrubt_0_20000_constant_delay,"{'grace_period': '200', 'max_depth': 'inf', 's...",02-Jul-23 14:21:40,delay,,<class 'river.tree.hoeffding_adaptive_tree_cla...,0.7,50,100,0.575599,...,,,,,,,,,,
4,LED_abrubt_0_20000_constant_delay_ssl_0.1_0_20000,"{'n_models': '10', 'max_features': '5', 'lambd...",02-Jul-23 14:59:29,ssl,0.1,<class 'river.forest.adaptive_random_forest.AR...,0.7,50,100,0.6211,...,,,,,,,,,,
5,LED_abrubt_0_20000_constant_delay_ssl_0.2_0_20000,"{'n_models': '10', 'max_features': '5', 'lambd...",02-Jul-23 15:52:39,ssl,0.2,<class 'river.forest.adaptive_random_forest.AR...,0.7,50,100,0.631223,...,,,,,,,,,,
6,LED_abrubt_0_20000_constant_delay_ssl_0.1_0_20000,"{'grace_period': '200', 'max_depth': 'inf', 's...",02-Jul-23 16:30:45,ssl,0.1,<class 'river.tree.hoeffding_adaptive_tree_cla...,0.7,50,100,0.534179,...,,,,,,,,,,
7,LED_abrubt_0_20000_constant_delay_ssl_0.2_0_20000,"{'grace_period': '200', 'max_depth': 'inf', 's...",02-Jul-23 17:22:15,ssl,0.2,<class 'river.tree.hoeffding_adaptive_tree_cla...,0.7,50,100,0.493311,...,,,,,,,,,,
8,LED_abrubt_0_20000,"{'classifier_n_models': '10', 'classifier_max_...",02-Jul-23 17:31:00,init,,ICLC,,50,100,0.0,...,,,,,,,,,,
9,LED_abrubt_0_20000_constant_delay,"{'classifier_n_models': '10', 'classifier_max_...",02-Jul-23 18:15:40,delay,,ICLC,,50,100,0.622648,...,,,,,,,,,,


In [152]:
res = res.iloc[:,:-200]
res['stream_short'] = res['stream'].apply(lambda x: '_'.join(x.split('_')[:2]))

In [153]:
res

Unnamed: 0,stream,hyperparameters,date,type,probability,classifier,threshold,B,frequency,B_1,...,m_191,m_192,m_193,m_194,m_195,m_196,m_197,m_198,m_199,stream_short
0,LED_gradual_0_20000,"{'threshold': '0.7', 'train_period': '0', 'cla...",03-Jul-23 10:27:07,init,,<class 'river.naive_bayes.gaussian.GaussianNB'>,0.7,50,100,0.0,...,0.664053751399776,0.6871508379888268,0.7539975399753998,0.6881612651742955,0.6964586846543002,0.7868282284303827,0.6858521261079322,0.7089443635956565,0.6878483835005574,LED_gradual
1,LED_gradual_0_20000_constant_delay,"{'threshold': '0.7', 'train_period': '0', 'cla...",03-Jul-23 11:23:48,delay,,<class 'river.naive_bayes.gaussian.GaussianNB'>,0.7,50,100,0.462539,...,0.4097338233656309,0.3312527864467232,0.4632073361664057,0.3877323833908493,0.3460430059853691,0.3926442469913396,0.4080196582151233,0.4061624649859944,0.3524617617505861,LED_gradual
2,LED_gradual_0_20000_constant_delay_ssl_0.2_0_2...,"{'threshold': '0.7', 'train_period': '0', 'cla...",03-Jul-23 13:17:23,ssl,0.2,<class 'river.naive_bayes.gaussian.GaussianNB'>,0.7,50,100,0.425131,...,,,,,,,,,,LED_gradual
3,LED_gradual_0_20000_constant_delay_ssl_0.1_0_2...,"{'threshold': '0.7', 'train_period': '0', 'cla...",03-Jul-23 13:21:17,ssl,0.1,<class 'river.naive_bayes.gaussian.GaussianNB'>,0.7,50,100,0.449828,...,,,,,,,,,,LED_gradual


In [158]:
# groups = res.groupby('stream_short')

# for gr1, gr_idx1 in groups:
    
    
groups2 = res.groupby('classifier')
for gr, gr_idx in groups2:
    classifier_name = mapper.get(gr,gr)
    print(gr)
    print(classifier_name)
    # print(gr_idx['stream'])
    # stream_short = gr_idx['stream_short'].iloc[0]
    # print(f'{stream_short}\\{classifier_name}_kappa.csv')
    # gr_idx = gr_idx.drop(columns=['stream_short'])
    save_formatted_logs(gr_idx,f'HyperPlane\\{classifier_name}_kappa.csv')

<class 'river.forest.adaptive_random_forest.ARFClassifier'>
ARF
<class 'river.naive_bayes.gaussian.GaussianNB'>
NB
<class 'river.tree.hoeffding_adaptive_tree_classifier.HoeffdingAdaptiveTreeClassifier'>
HT
CluStream
CluStream
ICLC
ICLC


In [241]:
concatenate_res(res.drop(columns='threshold').iloc[:4,:-200],"LED_gradual\\CluStream.csv")

In [163]:
save_formatted_logs(res.iloc[6:,:-200],'LED_abrupt\\CluStream.csv')

In [None]:
res.iloc[28]['hyperparameters']['classifier']

"<class 'river.naive_bayes.gaussian.GaussianNB'>"

In [116]:
old_res = pd.read_csv(os.path.join("logs_formatted",'CoverType\\CluStream.csv'))

In [117]:
old_res = old_res[old_res['threshold']!=1]

In [115]:
res.iloc[8:16,:-200]

Unnamed: 0,stream,hyperparameters,date,type,probability,classifier,threshold,B,frequency,B_1,...,m_190,m_191,m_192,m_193,m_194,m_195,m_196,m_197,m_198,m_199
8,LED_gradual_delay_0_20000,"{'threshold': '1', 'train_period': '0', 'class...",26-May-23 01:28:34,delay,,<class 'river.naive_bayes.gaussian.GaussianNB'>,1,50,100,0.65084,...,0.73,0.69,0.75,0.77,0.72,0.77,0.77,0.7,0.71,0.78
9,initail_LED_Drift_gradual,"{'threshold': '1', 'train_period': '0', 'class...",25-May-23 23:49:41,init,,<class 'river.naive_bayes.gaussian.GaussianNB'>,1,50,100,0.0,...,0.73,0.69,0.75,0.77,0.72,0.77,0.77,0.7,0.71,0.78
10,LED_gradual_constant_delay_lfs_0.1_0_20000,"{'threshold': '1', 'train_period': '0', 'class...",25-May-23 23:49:35,lfs,0.1,<class 'river.naive_bayes.gaussian.GaussianNB'>,1,50,100,0.0,...,,,,,,,,,,
11,LED_gradual_constant_delay_lfs_0.2_0_20000,"{'threshold': '1', 'train_period': '0', 'class...",26-May-23 00:01:39,lfs,0.2,<class 'river.naive_bayes.gaussian.GaussianNB'>,1,50,100,0.0,...,,,,,,,,,,
12,LED_gradual_constant_delay_lfs_0.5_0_20000,"{'threshold': '1', 'train_period': '0', 'class...",26-May-23 00:08:43,lfs,0.5,<class 'river.naive_bayes.gaussian.GaussianNB'>,1,50,100,0.0,...,,,,,,,,,,
13,LED_gradual_constant_delay_ssl_0.1_0_20000,"{'threshold': '1', 'train_period': '0', 'class...",26-May-23 05:58:19,ssl,0.1,<class 'river.naive_bayes.gaussian.GaussianNB'>,1,50,100,0.650095,...,,,,,,,,,,
14,LED_gradual_constant_delay_ssl_0.2_0_20000,"{'threshold': '1', 'train_period': '0', 'class...",26-May-23 08:00:28,ssl,0.2,<class 'river.naive_bayes.gaussian.GaussianNB'>,1,50,100,0.647374,...,,,,,,,,,,
15,LED_gradual_constant_delay_ssl_0.5_0_20000,"{'threshold': '1', 'train_period': '0', 'class...",26-May-23 10:43:42,ssl,0.5,<class 'river.naive_bayes.gaussian.GaussianNB'>,1,50,100,0.639145,...,,,,,,,,,,
