In [28]:
import os
import re
import pandas as pd

def extract_last_line(file_path):
    """Extract the last line from a file."""
    with open(file_path, 'r') as file:
        lines = file.readlines()
        if lines:
            return lines[-1].strip()
        else:
            return None

def parse_last_line(last_line):
    """Parse the last line of the log file."""
    values = last_line.split(', ')
    data = {
        'Model': values[0],
        'DataType': values[1],
        'Accuracy': float(values[2]),
        'Precision': float(values[3]),
        'Recall': float(values[4]),
        'F1 Score': float(values[5]),
        'AUC': float(values[6])
    }
    return data

def extract_model_name(file_name):
    """Extract the model name and data type from the file name."""
    match = re.search(r'output_(.*)_(.*)_\d+\.log', file_name)
    if match:
        model_name = match.group(1)
        data_type = match.group(2)
        return model_name, data_type
    else:
        return None, None

if __name__ == "__main__":
    # Specify the directory path
    directory_path = 'logs/not_normalized/log_sci_sm_2000_F'

    # Initialize an empty list to store the data
    data = []

    # Iterate over all files in the directory
    for file_name in os.listdir(directory_path):
        if file_name.startswith('output_') and file_name.endswith('.log'):
            model_name, data_type = extract_model_name(file_name)
            print(f"File: {file_name}, Model: {model_name}, DataType: {data_type}")
            if model_name in ['ANN', 'ml_RF', 'ml_LR', 'ml_XGBoost', 'ml_SVM'] and data_type in ['Z', 'F', 'E', 'ZS', 'ZSF', 'ZSE', 'ZSEF']:
                file_path = os.path.join(directory_path, file_name)
                last_line = extract_last_line(file_path)
                if last_line:
                    parsed_data = parse_last_line(last_line)
                    parsed_data['Model'] = model_name
                    parsed_data['FileNameDataType'] = data_type
                    data.append(parsed_data)

    # Create a DataFrame
    df = pd.DataFrame(data)

    # Print the DataFrame
    print(df)

    # Save the DataFrame to a CSV file
    df.to_csv('output.csv', index=False)

File: output_ml_XGBoost_Z_889428.log, Model: ml_XGBoost, DataType: Z
File: output_ml_RF_E_889423.log, Model: ml_RF, DataType: E
File: output_ml_LR_ZSE_889419.log, Model: ml_LR, DataType: ZSE
File: output_ml_RF_ZSE_889426.log, Model: ml_RF, DataType: ZSE
File: output_ANN_ZSF_889439.log, Model: ANN, DataType: ZSF
File: output_ml_XGBoost_E_889430.log, Model: ml_XGBoost, DataType: E
File: output_ml_RF_ZS_889424.log, Model: ml_RF, DataType: ZS
File: output_ml_LR_F_889415.log, Model: ml_LR, DataType: F
File: output_ml_LR_ZS_889417.log, Model: ml_LR, DataType: ZS
File: output_ANN_F_889436.log, Model: ANN, DataType: F
File: output_ml_RF_ZSF_889425.log, Model: ml_RF, DataType: ZSF
File: output_ml_RF_ZSEF_889427.log, Model: ml_RF, DataType: ZSEF
File: output_ANN_ZSE_889440.log, Model: ANN, DataType: ZSE
File: output_ml_XGBoost_ZSF_889432.log, Model: ml_XGBoost, DataType: ZSF
File: output_ml_XGBoost_ZSEF_889434.log, Model: ml_XGBoost, DataType: ZSEF
File: output_ANN_ZSEF_889441.log, Model: ANN, D