In [132]:
import os 
import pandas as pd
import re
import numpy as np

In [133]:
name_pattern = '(?s:.*)(ssl|init|lfs|delay)'
probability_pattern  = '\d\.\d{1,2}'

In [134]:
def get_type(x):
    try:
        return re.search(name_pattern,x).group(1)
    except Exception as e:
        return 'init'

def get_probability(x):
    if re.search(probability_pattern,x):
        return re.search(probability_pattern,x).group(0)

In [237]:

def prepare_logs(logs_file,info_from_hyperparameters = []):

    # read data
    res = pd.read_csv(os.path.join('logs',logs_file),header= None)

    # renaming columns
    res = res.rename(columns={0:'stream',1:'hyperparameters',2:'B',3:'frequency'})
    res['hyperparameters'] = res['hyperparameters'].apply(lambda x: dict([i.split(";") for i in re.findall( '\((.*?)\)',x)]))
    B = res['B'].iloc[0]
    cl = res.iloc[:,B+6:].columns
    res = res.rename(columns = dict(zip(res.columns[4:B+6],[f"B_{i}" for i in range(1,B+3)])))
    res = res.rename(columns = dict(zip(cl,[f"m_{i}" for i in range(len(cl))])))

    # adding new columns
    res['date'] = res['stream'].apply(lambda x: x.split(' - ')[0])
    res['stream'] = res['stream'].apply(lambda x: x.split(' - ')[1])
    res['type'] = res['stream'].apply(lambda x :  get_type(x))
    res['probability'] = res['stream'].apply(lambda x :  get_probability(x))
    
    for inf in info_from_hyperparameters:
        res['classifier'] = res['hyperparameters'].apply(lambda x : 'CluStream' if 'classifier' not in list(x.keys()) else x['classifier'])
        res['threshold'] = res['hyperparameters'].apply(lambda x : None if 'threshold' not in list(x.keys()) else x['threshold'])
    # res['classifier'] = 'CluStream'
    # res['threshold'] = 0.1



    #change order
    new_order = list(res.columns[:2])
    new_order.extend(res.columns[-(3+len(info_from_hyperparameters)):])
    new_order.extend(res.columns[2:-(3+len(info_from_hyperparameters))])
    res = res[new_order]

    return res


In [136]:
def save_formatted_logs(res,file):
    res.to_csv(os.path.join("logs_formatted",file),index = False)

In [137]:
def concatenate_res(res,file):
    new_res = pd.read_csv(os.path.join("logs_formatted",file))
    res = pd.concat([new_res,res])
    res.to_csv(os.path.join("logs_formatted",file),index = False)

In [2]:
from river.datasets import synth
dataset = synth.Hyperplane(seed=123, n_features=2)

In [9]:
next(dataset.take(1))[0]

{0: 0.4072417636703983, 1: 0.10770023493843905}

In [165]:
os.listdir('logs')

['12_06_2023_08_40_36.log',
 '12_06_2023_08_55_42.log',
 '12_06_2023_12_24_12.log',
 '13-Jun-23 222504 - LED_abrupt_rando.txt']

In [168]:
new_res = pd.read_csv(os.path.join("logs",logs_file))

In [247]:
logs_file = os.listdir('logs')[4]
res = prepare_logs(logs_file,['classifier', 'threshold'])
res

IndexError: list index out of range

In [216]:
res['probability'].value_counts()

probability
0.1    2
0.2    2
0.5    2
Name: count, dtype: int64

In [239]:
res[[ 'stream','classifier','threshold','type','probability']]

Unnamed: 0,stream,classifier,threshold,type,probability
0,LED_gradual_constant_delay_lfs_0.5_0_20000,CluStream,,lfs,0.5
1,initail_LED_Drift_gradual,CluStream,,init,
2,LED_gradual_delay_0_20000,CluStream,,delay,
3,LED_gradual_constant_delay_ssl_0.5_0_20000,CluStream,,ssl,0.5
4,LED_gradual_random_delay_delay_0_20000,<class 'river.forest.adaptive_random_forest.AR...,0.7,delay,
5,LED_gradual_random_delay_delay_0_20000,<class 'river.naive_bayes.gaussian.GaussianNB'>,0.7,delay,
6,LED_gradual_random_delay_constant_delay_lfs_0....,<class 'river.forest.adaptive_random_forest.AR...,0.7,lfs,0.1
7,LED_gradual_random_delay_constant_delay_lfs_0....,<class 'river.naive_bayes.gaussian.GaussianNB'>,0.7,lfs,0.1
8,initail_LED_Drift_gradual,<class 'river.forest.adaptive_random_forest.AR...,0.7,init,
9,LED_gradual_random_delay_constant_delay_lfs_0....,<class 'river.forest.adaptive_random_forest.AR...,0.7,lfs,0.2


In [186]:
groups = res.groupby('classifier')
mapper = dict(zip(["<class 'river.forest.adaptive_random_forest.ARFClassifier'>",
 "<class 'river.tree.hoeffding_adaptive_tree_classifier.HoeffdingAdaptiveTreeClassifier'>",
 "<class 'river.naive_bayes.gaussian.GaussianNB'>", "<class 'river.dummy.PriorClassifier'>"],['ARF','HT','NB','Majority']))

In [246]:
for gr, gr_idx in groups:
    print(gr)
    save_formatted_logs(gr_idx.iloc[:,:-200],f'LED_abrupt\\{mapper[gr]}_random.csv')

<class 'river.forest.adaptive_random_forest.ARFClassifier'>
<class 'river.naive_bayes.gaussian.GaussianNB'>
<class 'river.tree.hoeffding_adaptive_tree_classifier.HoeffdingAdaptiveTreeClassifier'>


In [243]:
res = res.sort_values(by =[ 'classifier','threshold','type','probability']).reset_index(drop = True)[:-4]
res

Unnamed: 0,stream,hyperparameters,date,type,probability,classifier,threshold,B,frequency,B_1,...,m_390,m_391,m_392,m_393,m_394,m_395,m_396,m_397,m_398,m_399
0,LED_gradual_random_delay_delay_0_20000,"{'n_models': '10', 'max_features': '5', 'lambd...",13-Jun-23 22:23:23,delay,,<class 'river.forest.adaptive_random_forest.AR...,0.7,50,100,0.704952,...,,,,,,,,,,
1,initail_LED_Drift_gradual,"{'n_models': '10', 'max_features': '5', 'lambd...",13-Jun-23 22:33:48,init,,<class 'river.forest.adaptive_random_forest.AR...,0.7,50,100,0.0,...,,,,,,,,,,
2,LED_gradual_random_delay_constant_delay_lfs_0....,"{'n_models': '10', 'max_features': '5', 'lambd...",13-Jun-23 22:30:00,lfs,0.1,<class 'river.forest.adaptive_random_forest.AR...,0.7,50,100,0.0,...,,,,,,,,,,
3,LED_gradual_random_delay_constant_delay_lfs_0....,"{'n_models': '10', 'max_features': '5', 'lambd...",13-Jun-23 22:34:21,lfs,0.2,<class 'river.forest.adaptive_random_forest.AR...,0.7,50,100,0.0,...,,,,,,,,,,
4,LED_gradual_random_delay_constant_delay_lfs_0....,"{'n_models': '10', 'max_features': '5', 'lambd...",13-Jun-23 22:39:34,lfs,0.5,<class 'river.forest.adaptive_random_forest.AR...,0.7,50,100,0.0,...,,,,,,,,,,
5,LED_gradual_random_delay_constant_delay_ssl_0....,"{'n_models': '10', 'max_features': '5', 'lambd...",14-Jun-23 00:14:53,ssl,0.1,<class 'river.forest.adaptive_random_forest.AR...,0.7,50,100,0.688536,...,,,,,,,,,,
6,LED_gradual_random_delay_constant_delay_ssl_0....,"{'n_models': '10', 'max_features': '5', 'lambd...",14-Jun-23 02:15:13,ssl,0.2,<class 'river.forest.adaptive_random_forest.AR...,0.7,50,100,0.684738,...,,,,,,,,,,
7,LED_gradual_random_delay_constant_delay_ssl_0....,"{'n_models': '10', 'max_features': '5', 'lambd...",14-Jun-23 03:41:23,ssl,0.5,<class 'river.forest.adaptive_random_forest.AR...,0.7,50,100,0.681846,...,,,,,,,,,,
8,LED_gradual_random_delay_delay_0_20000,"{'threshold': '0.7', 'train_period': '0', 'cla...",13-Jun-23 22:27:10,delay,,<class 'river.naive_bayes.gaussian.GaussianNB'>,0.7,50,100,0.527764,...,,,,,,,,,,
9,initail_LED_Drift_gradual,"{'threshold': '0.7', 'train_period': '0', 'cla...",13-Jun-23 22:35:10,init,,<class 'river.naive_bayes.gaussian.GaussianNB'>,0.7,50,100,0.0,...,,,,,,,,,,


In [210]:
res['threshold'] = res['threshold'].astype(float)
res['probability'] = res['probability'].astype(float)
res['date'] = pd.to_datetime(res['date'])

  res['date'] = pd.to_datetime(res['date'])


In [241]:
concatenate_res(res.drop(columns='threshold').iloc[:4,:-200],"LED_gradual\\CluStream.csv")

In [131]:
save_formatted_logs(res,'CoverType\\CluStream.csv')

In [None]:
res.iloc[28]['hyperparameters']['classifier']

"<class 'river.naive_bayes.gaussian.GaussianNB'>"

In [116]:
old_res = pd.read_csv(os.path.join("logs_formatted",'CoverType\\CluStream.csv'))

In [117]:
old_res = old_res[old_res['threshold']!=1]

In [115]:
res.iloc[8:16,:-200]

Unnamed: 0,stream,hyperparameters,date,type,probability,classifier,threshold,B,frequency,B_1,...,m_190,m_191,m_192,m_193,m_194,m_195,m_196,m_197,m_198,m_199
8,LED_gradual_delay_0_20000,"{'threshold': '1', 'train_period': '0', 'class...",26-May-23 01:28:34,delay,,<class 'river.naive_bayes.gaussian.GaussianNB'>,1,50,100,0.65084,...,0.73,0.69,0.75,0.77,0.72,0.77,0.77,0.7,0.71,0.78
9,initail_LED_Drift_gradual,"{'threshold': '1', 'train_period': '0', 'class...",25-May-23 23:49:41,init,,<class 'river.naive_bayes.gaussian.GaussianNB'>,1,50,100,0.0,...,0.73,0.69,0.75,0.77,0.72,0.77,0.77,0.7,0.71,0.78
10,LED_gradual_constant_delay_lfs_0.1_0_20000,"{'threshold': '1', 'train_period': '0', 'class...",25-May-23 23:49:35,lfs,0.1,<class 'river.naive_bayes.gaussian.GaussianNB'>,1,50,100,0.0,...,,,,,,,,,,
11,LED_gradual_constant_delay_lfs_0.2_0_20000,"{'threshold': '1', 'train_period': '0', 'class...",26-May-23 00:01:39,lfs,0.2,<class 'river.naive_bayes.gaussian.GaussianNB'>,1,50,100,0.0,...,,,,,,,,,,
12,LED_gradual_constant_delay_lfs_0.5_0_20000,"{'threshold': '1', 'train_period': '0', 'class...",26-May-23 00:08:43,lfs,0.5,<class 'river.naive_bayes.gaussian.GaussianNB'>,1,50,100,0.0,...,,,,,,,,,,
13,LED_gradual_constant_delay_ssl_0.1_0_20000,"{'threshold': '1', 'train_period': '0', 'class...",26-May-23 05:58:19,ssl,0.1,<class 'river.naive_bayes.gaussian.GaussianNB'>,1,50,100,0.650095,...,,,,,,,,,,
14,LED_gradual_constant_delay_ssl_0.2_0_20000,"{'threshold': '1', 'train_period': '0', 'class...",26-May-23 08:00:28,ssl,0.2,<class 'river.naive_bayes.gaussian.GaussianNB'>,1,50,100,0.647374,...,,,,,,,,,,
15,LED_gradual_constant_delay_ssl_0.5_0_20000,"{'threshold': '1', 'train_period': '0', 'class...",26-May-23 10:43:42,ssl,0.5,<class 'river.naive_bayes.gaussian.GaussianNB'>,1,50,100,0.639145,...,,,,,,,,,,
