In [1]:
import os 
import pandas as pd
import re
import numpy as np

In [2]:
name_pattern = '(?s:.*)(ssl|init|lfs|delay)'
probability_pattern  = '\d\.\d{1,2}'

In [3]:
def get_type(x):
    try:
        return re.search(name_pattern,x).group(1)
    except Exception as e:
        return 'init'

def get_probability(x):
    proba = re.findall(probability_pattern,x)
    if proba:
        return proba[-1]
    

In [4]:

def prepare_logs(logs_file,info_from_hyperparameters = []):

    # read data
    res = pd.read_csv(os.path.join('logs',logs_file),header= None)

    # renaming columns
    res = res.rename(columns={0:'stream',1:'hyperparameters',2:'B',3:'frequency', 4:'metric'})
    res['hyperparameters'] = res['hyperparameters'].apply(lambda x: dict([i.split(";") for i in re.findall( '\((.*?)\)',x)]))
    B = res['B'].iloc[0]
    cl = res.iloc[:,B+7:].columns
    res = res.rename(columns = dict(zip(res.columns[5:B+6],[f"B_{i}" for i in range(1,B+3)])))
    res = res.rename(columns = dict(zip(cl,[f"m_{i}" for i in range(len(cl))])))

    # adding new columns
    res['date'] = res['stream'].apply(lambda x: x.split(' - ')[0])
    res['stream'] = res['stream'].apply(lambda x: x.split(' - ')[1])
    res['type'] = res['stream'].apply(lambda x :  get_type(x))
    res['probability'] = res['stream'].apply(lambda x :  get_probability(x))
    
    for inf in info_from_hyperparameters:
        res['classifier'] = res['hyperparameters'].apply(lambda x : 'CluStream' if 'classifier' not in list(x.keys()) else 'ICLC' if 'classifier_n_models' in list(x.keys()) else 'ICLC_HT'  if 'classifier_grace_period' in list(x.keys()) 
                                                         else 'ICLC_NB' if 'nu' in list(x.keys())  else x['classifier'])
        res['threshold'] = res['hyperparameters'].apply(lambda x : None if 'threshold' not in list(x.keys()) else x['threshold'])
    # res['classifier'] = 'CluStream'
    # res['threshold'] = 0.1



    #change order
    new_order = list(res.columns[:2])
    new_order.extend(res.columns[-(3+len(info_from_hyperparameters)):])
    new_order.extend(res.columns[2:-(3+len(info_from_hyperparameters))])
    res = res[new_order]

    return res


In [5]:
def save_formatted_logs(res,file):
    res.to_csv(os.path.join("logs_formatted",file),index = False)

In [6]:
def concatenate_res(res,file):
    new_res = pd.read_csv(os.path.join("logs_formatted",file))
    res = pd.concat([new_res,res])
    res.to_csv(os.path.join("logs_formatted",file),index = False)

In [7]:
mapper = dict(zip(["<class 'river.forest.adaptive_random_forest.ARFClassifier'>",
 "<class 'river.tree.hoeffding_adaptive_tree_classifier.HoeffdingAdaptiveTreeClassifier'>",
 "<class 'river.naive_bayes.gaussian.GaussianNB'>", "<class 'river.dummy.PriorClassifier'>","<class 'river.dummy.NoChangeClassifier'>","CluStream"],['FARF','FHT','FNB','Majority','NoChange',"CluStream"]))

In [8]:
os.listdir('logs')

['05_08_2023_17_17_30_Accuracy_Electricity.log',
 '05_08_2023_17_41_38_Accuracy_RBF_moderate.log',
 '05_08_2023_17_52_34_Accuracy_RBF_moderate.log',
 '05_08_2023_17_56_12_Accuracy_RBF_fast.log',
 '05_08_2023_17_58_26_Accuracy_RBF_fast.log',
 '05_08_2023_18_18_17_Accuracy_Hyperplane.log',
 '07_08_2023_10_34_43_Accuracy_RBF_moderate.log',
 '07_08_2023_11_24_21_Accuracy_RBF_moderate.log',
 '07_08_2023_13_11_27_Accuracy_LED_abrupt.log',
 '07_08_2023_13_18_05_Accuracy_LED_abrupt.log',
 '07_08_2023_13_19_08_Accuracy_RBF_moderate.log',
 '10_08_2023_13_03_18_Accuracy_LED_abrupt.log',
 '10_08_2023_16_31_31_Accuracy_LED_abrupt.log',
 '15_08_2023_12_50_14_Accuracy_LED_abrupt.log',
 '16_08_2023_09_30_06_Accuracy_LED_abrupt.log',
 '19_08_2023_12_54_38_Accuracy_Hyperplane.log',
 '19_08_2023_12_57_40_Accuracy_Hyperplane.log',
 '19_08_2023_13_23_44_Accuracy_Hyperplane.log',
 '19_08_2023_13_27_04_Accuracy_Hyperplane.log',
 '19_08_2023_14_38_02_Accuracy_Hyperplane.log',
 '19_08_2023_15_11_27_Accuracy_LE

In [9]:


logs_file = os.listdir('logs')[-1]
print(logs_file)
res = prepare_logs(logs_file,['classifier','threshold'])


res

23_08_2023_09_59_09_Accuracy_Hyperplane.log


Unnamed: 0,stream,hyperparameters,date,type,probability,classifier,threshold,B,frequency,metric,...,m_90,m_91,m_92,m_93,m_94,m_95,m_96,m_97,m_98,m_99
0,HyperPlane_0_10000_constant_delay_lfs_0.2_0_10000,"{'n_models': '10', 'max_features': '3', 'lambd...",23-Aug-23 10:00:59,lfs,0.2,<class 'river.forest.adaptive_random_forest.AR...,,50,100,Accuracy,...,,,,,,,,,,
1,HyperPlane_0_10000_constant_delay_lfs_0.2_0_10000,"{'grace_period': '200', 'max_depth': 'inf', 's...",23-Aug-23 10:01:11,lfs,0.2,<class 'river.tree.hoeffding_adaptive_tree_cla...,,50,100,Accuracy,...,,,,,,,,,,
2,HyperPlane_0_10000_constant_delay_lfs_0.2_0_10000,"{'train_period': '0', 'classifier': '<class 'r...",23-Aug-23 10:01:15,lfs,0.2,<class 'river.naive_bayes.gaussian.GaussianNB'>,,50,100,Accuracy,...,,,,,,,,,,
3,HyperPlane_0_10000,"{'n_models': '10', 'max_features': '3', 'lambd...",23-Aug-23 10:01:18,init,,<class 'river.forest.adaptive_random_forest.AR...,,50,100,Accuracy,...,0.86,0.92,0.84,0.92,0.9,0.88,0.82,0.84,0.84,0.9
4,HyperPlane_0_10000,"{'grace_period': '200', 'max_depth': 'inf', 's...",23-Aug-23 10:01:33,init,,<class 'river.tree.hoeffding_adaptive_tree_cla...,,50,100,Accuracy,...,0.89,0.9,0.89,0.93,0.96,0.95,0.9,0.89,0.92,0.95
5,HyperPlane_0_10000,"{'train_period': '0', 'classifier': '<class 'r...",23-Aug-23 10:01:36,init,,<class 'river.naive_bayes.gaussian.GaussianNB'>,,50,100,Accuracy,...,0.91,0.94,0.91,0.91,0.96,0.94,0.84,0.89,0.93,0.92
6,HyperPlane_0_10000_constant_delay_lfs_0.4_0_10000,"{'n_models': '10', 'max_features': '3', 'lambd...",23-Aug-23 10:02:06,lfs,0.4,<class 'river.forest.adaptive_random_forest.AR...,,50,100,Accuracy,...,,,,,,,,,,
7,HyperPlane_0_10000_constant_delay_lfs_0.4_0_10000,"{'grace_period': '200', 'max_depth': 'inf', 's...",23-Aug-23 10:02:18,lfs,0.4,<class 'river.tree.hoeffding_adaptive_tree_cla...,,50,100,Accuracy,...,,,,,,,,,,
8,HyperPlane_0_10000_constant_delay_lfs_0.4_0_10000,"{'train_period': '0', 'classifier': '<class 'r...",23-Aug-23 10:02:20,lfs,0.4,<class 'river.naive_bayes.gaussian.GaussianNB'>,,50,100,Accuracy,...,,,,,,,,,,
9,HyperPlane_0_10000_constant_delay,"{'n_models': '10', 'max_features': '3', 'lambd...",23-Aug-23 10:05:44,delay,,<class 'river.forest.adaptive_random_forest.AR...,,50,100,Accuracy,...,0.86,0.92,0.84,0.92,0.9,0.88,0.82,0.84,0.84,0.9


In [18]:
res.iloc[:,:-200]

Unnamed: 0,stream,hyperparameters,date,type,probability,classifier,threshold,B,frequency,metric,...,B_43,B_44,B_45,B_46,B_47,B_48,B_49,B_50,B_51,56
0,HyperPlane_0_20000_constant_delay,"{'classifier_grace_period': '200', 'classifier...",19-Aug-23 13:29:28,delay,,ICLC_HT,,50,100,Accuracy,...,0.814796,0.814096,0.814096,0.813595,0.814696,0.814096,0.815998,0.815797,0.815897,0.816198
1,HyperPlane_0_20000_constant_delay_ssl_0.1_0_20000,"{'classifier_grace_period': '200', 'classifier...",19-Aug-23 13:32:07,ssl,0.1,ICLC_HT,,50,100,Accuracy,...,0.861183,0.861519,0.861856,0.860061,0.861519,0.860173,0.861856,0.861968,0.861968,0.863539
2,HyperPlane_0_20000_constant_delay,"{'classifier_grace_period': '200', 'classifier...",19-Aug-23 13:33:04,delay,,ICLC_HT,,50,100,CohenKappa,...,0.777125,0.773716,0.771714,0.770518,0.773117,0.774319,0.775127,0.775932,0.776132,0.775554
3,HyperPlane_0_20000_constant_delay_ssl_0.2_0_20000,"{'classifier_grace_period': '200', 'classifier...",19-Aug-23 13:33:28,ssl,0.2,ICLC_HT,,50,100,Accuracy,...,0.852532,0.852278,0.852152,0.853671,0.853038,0.851772,0.851013,0.851519,0.851519,0.851899
4,HyperPlane_0_20000_constant_delay_ssl_0.4_0_20000,"{'classifier_grace_period': '200', 'classifier...",19-Aug-23 13:35:48,ssl,0.4,ICLC_HT,,50,100,Accuracy,...,0.867818,0.868648,0.867984,0.865991,0.866656,0.86649,0.864497,0.863833,0.863833,0.861508
5,HyperPlane_0_20000_constant_delay_ssl_0.1_0_20000,"{'classifier_grace_period': '200', 'classifier...",19-Aug-23 13:36:41,ssl,0.1,ICLC_HT,,50,100,CohenKappa,...,0.798454,0.799573,0.799575,0.799797,0.802043,0.80092,0.802043,0.803613,0.803613,0.802937
6,HyperPlane_0_20000_constant_delay_ssl_0.2_0_20000,"{'classifier_grace_period': '200', 'classifier...",19-Aug-23 13:38:43,ssl,0.2,ICLC_HT,,50,100,CohenKappa,...,0.794453,0.790151,0.789398,0.78813,0.787877,0.792686,0.793445,0.791421,0.791421,0.791433
7,HyperPlane_0_20000_constant_delay_ssl_0.4_0_20000,"{'classifier_grace_period': '200', 'classifier...",19-Aug-23 13:39:29,ssl,0.4,ICLC_HT,,50,100,CohenKappa,...,0.711204,0.709212,0.709876,0.710536,0.7112,0.7112,0.711528,0.711861,0.711861,0.712334


In [10]:
# groups = res.groupby('stream_short')
stream_name = 'HyperPlane'
metric = ''
delay_tyep = ''
# for gr1, gr_idx1 in groups:
#     print(gr1)
# res = res.iloc[:,:-200]
#res = res[res['type']=='ssl']
groups2 = res.groupby('classifier')
for gr, gr_idx in groups2:
    classifier_name = mapper.get(gr,gr)
    print(classifier_name)
    print(gr_idx['stream'])
    # stream_short = gr_idx['stream_short'].iloc[0]
    # gr_idx = gr_idx.drop(columns=['stream_short'])
    try:
        concatenate_res(gr_idx,f'{stream_name}\\{classifier_name}{metric}{delay_tyep}.csv')
    except Exception:
        save_formatted_logs(gr_idx,f'{stream_name}\\{classifier_name}{metric}{delay_tyep}.csv')

FARF
0     HyperPlane_0_10000_constant_delay_lfs_0.2_0_10000
3                                    HyperPlane_0_10000
6     HyperPlane_0_10000_constant_delay_lfs_0.4_0_10000
9                     HyperPlane_0_10000_constant_delay
12    HyperPlane_0_10000_constant_delay_ssl_0.2_0_10000
15    HyperPlane_0_10000_constant_delay_ssl_0.4_0_10000
Name: stream, dtype: object
FNB
2     HyperPlane_0_10000_constant_delay_lfs_0.2_0_10000
5                                    HyperPlane_0_10000
8     HyperPlane_0_10000_constant_delay_lfs_0.4_0_10000
11                    HyperPlane_0_10000_constant_delay
14    HyperPlane_0_10000_constant_delay_ssl_0.2_0_10000
17    HyperPlane_0_10000_constant_delay_ssl_0.4_0_10000
Name: stream, dtype: object
FHT
1     HyperPlane_0_10000_constant_delay_lfs_0.2_0_10000
4                                    HyperPlane_0_10000
7     HyperPlane_0_10000_constant_delay_lfs_0.4_0_10000
10                    HyperPlane_0_10000_constant_delay
13    HyperPlane_0_10000_constant_d

In [None]:
concatenate_res(res.drop(columns='threshold').iloc[:4,:-200],"LED_gradual\\CluStream.csv")

In [None]:
save_formatted_logs(res.iloc[6:,:-200],'LED_abrupt\\CluStream.csv')

In [None]:
res.iloc[28]['hyperparameters']['classifier']

"<class 'river.naive_bayes.gaussian.GaussianNB'>"

In [None]:
old_res = pd.read_csv(os.path.join("logs_formatted",'CoverType\\CluStream.csv'))

In [None]:
old_res = old_res[old_res['threshold']!=1]

In [None]:
res.iloc[8:16,:-200]

Unnamed: 0,stream,hyperparameters,date,type,probability,classifier,threshold,B,frequency,B_1,...,m_190,m_191,m_192,m_193,m_194,m_195,m_196,m_197,m_198,m_199
8,LED_gradual_delay_0_20000,"{'threshold': '1', 'train_period': '0', 'class...",26-May-23 01:28:34,delay,,<class 'river.naive_bayes.gaussian.GaussianNB'>,1,50,100,0.65084,...,0.73,0.69,0.75,0.77,0.72,0.77,0.77,0.7,0.71,0.78
9,initail_LED_Drift_gradual,"{'threshold': '1', 'train_period': '0', 'class...",25-May-23 23:49:41,init,,<class 'river.naive_bayes.gaussian.GaussianNB'>,1,50,100,0.0,...,0.73,0.69,0.75,0.77,0.72,0.77,0.77,0.7,0.71,0.78
10,LED_gradual_constant_delay_lfs_0.1_0_20000,"{'threshold': '1', 'train_period': '0', 'class...",25-May-23 23:49:35,lfs,0.1,<class 'river.naive_bayes.gaussian.GaussianNB'>,1,50,100,0.0,...,,,,,,,,,,
11,LED_gradual_constant_delay_lfs_0.2_0_20000,"{'threshold': '1', 'train_period': '0', 'class...",26-May-23 00:01:39,lfs,0.2,<class 'river.naive_bayes.gaussian.GaussianNB'>,1,50,100,0.0,...,,,,,,,,,,
12,LED_gradual_constant_delay_lfs_0.5_0_20000,"{'threshold': '1', 'train_period': '0', 'class...",26-May-23 00:08:43,lfs,0.5,<class 'river.naive_bayes.gaussian.GaussianNB'>,1,50,100,0.0,...,,,,,,,,,,
13,LED_gradual_constant_delay_ssl_0.1_0_20000,"{'threshold': '1', 'train_period': '0', 'class...",26-May-23 05:58:19,ssl,0.1,<class 'river.naive_bayes.gaussian.GaussianNB'>,1,50,100,0.650095,...,,,,,,,,,,
14,LED_gradual_constant_delay_ssl_0.2_0_20000,"{'threshold': '1', 'train_period': '0', 'class...",26-May-23 08:00:28,ssl,0.2,<class 'river.naive_bayes.gaussian.GaussianNB'>,1,50,100,0.647374,...,,,,,,,,,,
15,LED_gradual_constant_delay_ssl_0.5_0_20000,"{'threshold': '1', 'train_period': '0', 'class...",26-May-23 10:43:42,ssl,0.5,<class 'river.naive_bayes.gaussian.GaussianNB'>,1,50,100,0.639145,...,,,,,,,,,,
