# Create Tables

In [38]:
# Packages
import pandas as pd

## List of Models

In [39]:
models = ['rating_model_1', 'rating_model_2', 'rating_model_3', 'rating_model_4', 'change_model']
# set most_complex_model 
most_complex_model = models[0]

## Model Comparison

In [40]:
# Load acc_f1_majority from pickle - '../../../../Output/Modelling/Logistic Regression/rating_model_1/rating_model_1_acc_f1_majority.pkl'
acc_f1_majority = pd.read_pickle('../../../../Output/Modelling/Logistic Regression/rating_model_1/rating_model_1_acc_f1_majority.pkl')
# Version with each item rounded to 2 decimal places
acc_f1_majority_rounded = {k: round(v, 2) for k, v in acc_f1_majority.items()}
print(acc_f1_majority_rounded)

{'accuracy': 0.36, 'f1_score': 0.26, 'majority_baseline': 0.32}


## Most Complex Classification Rep

In [41]:
# Load classificiation report from pickle
classification_report = pd.read_pickle('../../../../Output/Modelling/Logistic Regression/' + most_complex_model + '/' + most_complex_model + '_classification_report.pkl')
print(classification_report)

# Convert classification report string to dataframe
classification_report_lines = classification_report.split('\n')
# split on spaces within and drop blanks
classification_report_data = [line.split() for line in classification_report_lines if line]
# drop lists begining with 'precision', 'accuracy', 'macro', 'weighted'
classification_report_data = [line for line in classification_report_data if line[0] not in ['precision', 'accuracy', 'macro', 'weighted']]
# Stack list of rows into dataframe
classification_report_data = pd.DataFrame(classification_report_data)
# Set columns to "Rating", "Precision", "Recall", "F1-Score", "Support"
classification_report_data.columns = ['Rating', 'Precision', 'Recall', 'F1-Score', 'Support']
print(classification_report_data)

# Export to Excel
classification_report_data.to_excel('../../../../Output/Modelling/Logistic Regression/Tables/Most_Complex_Model_Classification_Report.xlsx', index=False)

# Export to Latex
classification_report_data.to_latex('../../../../Output/Modelling/Logistic Regression/Tables/Most_Complex_Model_Classification_Report.tex', index=False)

              precision    recall  f1-score   support

         AAA       0.00      0.00      0.00        19
          AA       0.00      0.00      0.00        43
           A       0.00      0.00      0.00       219
         BBB       0.36      0.83      0.51       356
          BB       0.33      0.32      0.32       313
           B       0.75      0.06      0.12       144
         CCC       0.00      0.00      0.00        27
          CC       0.00      0.00      0.00         1
           C       0.00      0.00      0.00         3
           D       0.00      0.00      0.00         2

    accuracy                           0.36      1127
   macro avg       0.14      0.12      0.09      1127
weighted avg       0.30      0.36      0.26      1127

  Rating Precision Recall F1-Score Support
0    AAA      0.00   0.00     0.00      19
1     AA      0.00   0.00     0.00      43
2      A      0.00   0.00     0.00     219
3    BBB      0.36   0.83     0.51     356
4     BB      0.33   0.32 

## Most Complex Hyperparameters

In [42]:
# Load pickle '../../../../Output/Modelling/Logistic Regression/' + most_complex_model + '/' + most_complex_model + '_best_params.pkl'
best_params = pd.read_pickle('../../../../Output/Modelling/Logistic Regression/' + most_complex_model + '/' + most_complex_model + '_best_params.pkl')
print(best_params)

# Convert to dataframe
best_params = pd.DataFrame(best_params, index=[0])
# Set columns to "C", "Class Weighting Strategy", "L1 Ratio", "Multi-Class Strategy", "Penalty", "Solver"
best_params.columns = ['C', 'Class Weighting Strategy', 'L1 Ratio', 'Multi-Class Strategy', 'Penalty', 'Solver']
# Replace 'Multi-Class Strategy' values
best_params['Multi-Class Strategy'] = best_params['Multi-Class Strategy'].replace({'ovr': 'One vs Rest', 'multinomial': 'Multinomial'})
# Replace 'Penalty' values
best_params['Penalty'] = best_params['Penalty'].replace({'l1': 'L1', 'l2': 'L2', 'elasticnet': 'Elastic Net', 'none': 'None'})
# Replace 'Solver' values
best_params['Solver'] = best_params['Solver'].replace({'newton-cg': 'Newton Conjugate Gradient', 'lbfgs': 'Limited Memory Broyden–Fletcher–Goldfarb–Shanno', 'liblinear': 'Library for Large Linear Classification', 'sag': 'Stochastic Average Gradient', 'saga': 'SAGA'})
# Replace Class Weighting Strategy values
best_params['Class Weighting Strategy'] = best_params['Class Weighting Strategy'].replace({'balanced': 'Balanced', None: 'None'})
print(best_params)

# Export to Excel
best_params.to_excel('../../../../Output/Modelling/Logistic Regression/Tables/Most_Complex_Model_Best_Params.xlsx', index=False)

# Export to Latex
best_params.to_latex('../../../../Output/Modelling/Logistic Regression/Tables/Most_Complex_Model_Best_Params.tex', index=False)

{'C': 0.01, 'class_weight': None, 'l1_ratio': 0.25, 'multi_class': 'ovr', 'penalty': 'elasticnet', 'solver': 'saga'}
      C Class Weighting Strategy  L1 Ratio Multi-Class Strategy      Penalty  \
0  0.01                     None      0.25          One vs Rest  Elastic Net   

  Solver  
0   SAGA  
