In [3]:
import pandas as pd
from pathlib import Path

# Input files
files = [
    "model_results_Step3_T1_20_variables.csv",
    "model_results_Step4_T1.csv",
    "model_results_Step5_T8_20_variables.csv",
    "model_results_Step5_T8_AllVariables.csv",
    "model_results_Step6_ARIMA.csv"
]

# Corresponding status labels
status_map = {
    "model_results_Step3_T1_20_variables.csv": "Step3_1D_20_variables",
    "model_results_Step4_T1.csv": "Step4_1D_All_variables",
    "model_results_Step5_T8_20_variables.csv": "Step5_8D_20_variables",
    "model_results_Step5_T8_AllVariables.csv": "Step5_8D_All_Variables",
    "model_results_Step6_ARIMA.csv": "Step6_8D_incl_ARIMA"
}

# Folder where the files are stored
folder = Path("../results")
print(folder)

df_list = []

..\results


In [4]:
for filename in files:
    file_path = folder / filename
    df = pd.read_csv(file_path)

    # Add new columns
    df["SourceFile"] = filename
    df["ModelStatus"] = status_map[filename]

    df_list.append(df)

# Merge vertically
df_final = pd.concat(df_list, ignore_index=True)

# Save merged output
output_path = folder / "FinalOutcomes.csv"
df_final.to_csv(output_path, index=False)

print("Final merged file saved to:", output_path)
df_final.head()

Final merged file saved to: ..\results\FinalOutcomes.csv


Unnamed: 0,Dataset,Model,Accuracy,Precision (class 1),Recall (class 1),F1-score (class 1),F2-score,ROC-AUC,Computation Time (sec),TN,FP,FN,TP,SourceFile,ModelStatus
0,APPLE_DIRECTION_8D,LR,0.517748,0.520807,0.967213,0.677049,0.82567,0.51924,0.131266,10,380,14,413,model_results_Step3_T1_20_variables.csv,Step3_1D_20_variables
1,APPLE_DIRECTION_8D,KNN,0.487148,0.512048,0.398126,0.447958,0.416667,0.477845,0.010747,228,162,257,170,model_results_Step3_T1_20_variables.csv,Step3_1D_20_variables
2,APPLE_DIRECTION_8D,CART,0.485924,0.508516,0.489461,0.498807,0.493157,0.485111,0.040912,188,202,218,209,model_results_Step3_T1_20_variables.csv,Step3_1D_20_variables
3,APPLE_DIRECTION_8D,SVC,0.522644,0.522644,1.0,0.686495,0.845545,0.518315,2.36606,0,390,0,427,model_results_Step3_T1_20_variables.csv,Step3_1D_20_variables
4,APPLE_DIRECTION_8D,MLP,0.506732,0.522642,0.648712,0.578892,0.618856,0.485012,7.647316,137,253,150,277,model_results_Step3_T1_20_variables.csv,Step3_1D_20_variables


In [5]:
# Define the columns to keep
selected_cols = [
    "Dataset",
    "Model",
    "ModelStatus",
    "Precision (class 1)",
    "Recall (class 1)",
    "F1-score (class 1)",
    "ROC-AUC"
]

# Create the clean dataframe
df_clean = df_final[selected_cols].copy()

# Display first rows
df_clean.head()

Unnamed: 0,Dataset,Model,ModelStatus,Precision (class 1),Recall (class 1),F1-score (class 1),ROC-AUC
0,APPLE_DIRECTION_8D,LR,Step3_1D_20_variables,0.520807,0.967213,0.677049,0.51924
1,APPLE_DIRECTION_8D,KNN,Step3_1D_20_variables,0.512048,0.398126,0.447958,0.477845
2,APPLE_DIRECTION_8D,CART,Step3_1D_20_variables,0.508516,0.489461,0.498807,0.485111
3,APPLE_DIRECTION_8D,SVC,Step3_1D_20_variables,0.522644,1.0,0.686495,0.518315
4,APPLE_DIRECTION_8D,MLP,Step3_1D_20_variables,0.522642,0.648712,0.578892,0.485012


In [7]:
df_clean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 63 entries, 0 to 62
Data columns (total 7 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Dataset              63 non-null     object 
 1   Model                63 non-null     object 
 2   ModelStatus          63 non-null     object 
 3   Precision (class 1)  63 non-null     float64
 4   Recall (class 1)     63 non-null     float64
 5   F1-score (class 1)   63 non-null     float64
 6   ROC-AUC              63 non-null     float64
dtypes: float64(4), object(3)
memory usage: 3.6+ KB


In [15]:
# Sort by ROC-AUC from largest to smallest
df_clean = df_clean.sort_values(by="ROC-AUC", ascending=False).reset_index(drop=True)

# Show the top rows
df_clean.head(10)

Unnamed: 0,Dataset,Model,ModelStatus,Precision (class 1),Recall (class 1),F1-score (class 1),ROC-AUC
0,APPLE_TREND_8D_ARIMA,GBR,Step6_8D_incl_ARIMA,0.923077,0.560748,0.697674,0.925392
1,APPLE_TREND_8D_ARIMA,RFR,Step6_8D_incl_ARIMA,1.0,0.009346,0.018519,0.923174
2,APPLE_TREND_8D_ARIMA,ETR,Step6_8D_incl_ARIMA,0.9,0.084112,0.153846,0.917868
3,APPLE_TREND_8D_ARIMA,STACKING,Step6_8D_incl_ARIMA,0.875,0.065421,0.121739,0.913195
4,APPLE_TREND_8D_ARIMA,BAGGING_CART,Step6_8D_incl_ARIMA,0.818182,0.084112,0.152542,0.908601
5,APPLE_TREND_8D,BAGGING_CART,Step5_8D_All_Variables,0.910405,0.907781,0.909091,0.901516
6,APPLE_TREND_8D,ETR,Step5_8D_All_Variables,0.910405,0.907781,0.909091,0.901002
7,APPLE_TREND_8D,RFR,Step5_8D_All_Variables,0.91716,0.893372,0.905109,0.900366
8,APPLE_TREND_8D,VOTING_SOFT,Step5_8D_All_Variables,0.910405,0.907781,0.909091,0.897817
9,APPLE_TREND_8D,ETR,Step5_8D_20_variables,0.910405,0.907781,0.909091,0.897368


In [14]:
df_clean.tail(10)

Unnamed: 0,Dataset,Model,ModelStatus,Precision (class 1),Recall (class 1),F1-score (class 1),ROC-AUC
53,Cleaned_Features_for_ML,ABR,Step4_1D_All_variables,0.516456,0.955504,0.670501,0.500426
54,Cleaned_Features_for_ML,Bagging SVM,Step4_1D_All_variables,0.522644,1.0,0.686495,0.497406
55,Cleaned_Features_for_ML,SVC,Step4_1D_All_variables,0.522644,1.0,0.686495,0.494701
56,Cleaned_Features_for_ML,Voting (SVM + DT),Step4_1D_All_variables,0.523691,0.491803,0.507246,0.49304
57,APPLE_DIRECTION_8D,CART,Step3_1D_20_variables,0.508516,0.489461,0.498807,0.485111
58,APPLE_DIRECTION_8D,MLP,Step3_1D_20_variables,0.522642,0.648712,0.578892,0.485012
59,Cleaned_Features_for_ML,RFR,Step4_1D_All_variables,0.504902,0.482436,0.493413,0.482877
60,Cleaned_Features_for_ML,Bagging Decision Tree,Step4_1D_All_variables,0.496471,0.494145,0.495305,0.481892
61,APPLE_DIRECTION_8D,KNN,Step3_1D_20_variables,0.512048,0.398126,0.447958,0.477845
62,APPLE_TREND_8D_ARIMA,CART,Step6_8D_incl_ARIMA,0.333333,0.009346,0.018182,0.093458
