In [2]:
import pandas as pd
import glob

class data_analysis:

    def __init__(self):

        # Directory where all CSV files are located
        self.directory = "../csv_files/latest_expers/"
        
        # Create experiments info dataframe
        info_file = self.directory + "info.csv"
        self.info_df = pd.read_csv(info_file)

        epoch_files = self.directory + "epoch_step_info/*epoch.csv"
        step_files = self.directory + "epoch_step_info/*step.csv"

        # Combine all files for steps and epochs in two dataframes
        self.epoch_df, self.step_df = self.concat_epoch_step_files(epoch_files, step_files)

        # Add for each step and epoch dataframe row all the experiment info
        self.epoch_step_with_info()

    # Combine all files for steps and epochs in two dataframes
    def concat_epoch_step_files(self, epoch_files, step_files):
        epoch_files_list = glob.glob(epoch_files)
        step_files_list = glob.glob(step_files)

        epoch_dfs, step_dfs = [], []

        for epoch_file, step_file in zip(epoch_files_list, step_files_list):
            
            epoch_file_df = pd.read_csv(epoch_file)
            step_file_df = pd.read_csv(step_file)
            
            epoch_dfs.append(epoch_file_df)
            step_dfs.append(step_file_df)    

        epoch_df = pd.concat(epoch_dfs, ignore_index=True)
        step_df = pd.concat(step_dfs, ignore_index=True)

        return epoch_df, step_df
    
    # Add for each step and epoch dataframe row all the experiment info
    def epoch_step_with_info(self):

        self.epoch_df = pd.merge(self.info_df, self.epoch_df, on='exper_id')
        self.step_df = pd.merge(self.info_df, self.step_df, on='exper_id')
        

da = data_analysis()

In [3]:
grouped_df = da.epoch_df.groupby('exper_id')['accuracy'].max().reset_index()
grouped_df =pd.merge(da.info_df, grouped_df, on='exper_id')
info_df = da.info_df