# Produce LaTeX - Autogluon Tabular Only SCF Medium Presets

In [1]:
# Packages
import pandas as pd

## Leaderboard

In [2]:
# Load '../../../../Output/Modelling/Autogluon/Autogluon_Tabular_Only_SCF_Medium_Presets_leaderboard.xlsx'
leaderboard = pd.read_excel('../../../../Output/Modelling/Autogluon/Autogluon_Tabular_Only_SCF_Medium_Presets_leaderboard.xlsx')
leaderboard

Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,XGBoost,0.95386,0.955912,accuracy,0.102107,0.008969,15.308995,0.102107,0.008969,15.308995,1,True,11
1,LightGBM,0.952972,0.953908,accuracy,0.011925,0.008402,33.13471,0.011925,0.008402,33.13471,1,True,5
2,LightGBMXT,0.952972,0.955912,accuracy,0.014429,0.008175,30.502607,0.014429,0.008175,30.502607,1,True,4
3,WeightedEnsemble_L2,0.952972,0.955912,accuracy,0.017627,0.008903,31.24153,0.003198,0.000728,0.738924,2,True,14
4,CatBoost,0.952085,0.955912,accuracy,0.514511,0.010341,34.58843,0.514511,0.010341,34.58843,1,True,8
5,NeuralNetTorch,0.950311,0.955912,accuracy,0.086886,0.0815,14.769097,0.086886,0.0815,14.769097,1,True,12
6,LightGBMLarge,0.943212,0.951904,accuracy,0.05716,0.017987,105.922239,0.05716,0.017987,105.922239,1,True,13
7,ExtraTreesGini,0.939663,0.941884,accuracy,0.123295,0.058456,1.084774,0.123295,0.058456,1.084774,1,True,9
8,ExtraTreesEntr,0.938776,0.941884,accuracy,0.154433,0.068413,1.234442,0.154433,0.068413,1.234442,1,True,10
9,RandomForestGini,0.937888,0.93988,accuracy,0.207461,0.069566,2.562939,0.207461,0.069566,2.562939,1,True,6


In [3]:
# Keep columns model, score_test and output to LaTeX
# Rename to 'Model' and 'Test Accuracy'
lt_lb = leaderboard[['model', 'score_test']].rename(columns={'model': 'Model', 'score_test': 'Test Accuracy'})
# Clean up column values, being sure to remove underscores
lt_lb['Model'] = lt_lb['Model'].str.replace('_', '')
lt_lb

Unnamed: 0,Model,Test Accuracy
0,XGBoost,0.95386
1,LightGBM,0.952972
2,LightGBMXT,0.952972
3,WeightedEnsembleL2,0.952972
4,CatBoost,0.952085
5,NeuralNetTorch,0.950311
6,LightGBMLarge,0.943212
7,ExtraTreesGini,0.939663
8,ExtraTreesEntr,0.938776
9,RandomForestGini,0.937888


In [4]:
# Export to LaTeX
# Center all columns
# Two decimal places for all variables and commas for thousands
# Format numbers by converting to formatted strings
for col in ['Test Accuracy']:
    lt_lb[col] = lt_lb[col].apply(lambda x: '{:,.2f}'.format(x))
# Export to LaTeX
lt_string = lt_lb.to_latex(index=False, column_format='c' * 2, escape=False)
latex_with_font_size = "\\footnotesize\n" + lt_string + "\n\\normalsize"
with open('../../../../Output/Modelling/Autogluon/Autogluon_Tabular_Only_SCF_Medium_Presets_leaderboard.tex', 'w') as f:
    f.write(latex_with_font_size)


## Feature Importance

In [5]:
fi = pd.read_excel('../../../../Output/Modelling/Autogluon/Autogluon_Tabular_Only_SCF_Medium_Presets_feature_importance.xlsx')
fi

Unnamed: 0.1,Unnamed: 0,importance,stddev,p_value,n,p99_high,p99_low
0,rating_on_previous_fixed_quarter_date,0.714769,0.013384,1.474416e-08,5,0.742326,0.687211
1,weightedAverageShsOutDil,0.003381,0.000975,7.443162e-04,5,0.005387,0.001374
2,totalNonCurrentAssets,0.002669,0.000000,5.000000e-01,5,0.002669,0.002669
3,propertyPlantEquipmentNet,0.001957,0.000744,2.090536e-03,5,0.003490,0.000425
4,marketCap,0.000890,0.000890,4.450467e-02,5,0.002722,-0.000942
...,...,...,...,...,...,...,...
116,taxPayables,0.000000,0.000000,5.000000e-01,5,0.000000,0.000000
117,shortTermDebt,0.000000,0.000000,5.000000e-01,5,0.000000,0.000000
118,accountPayables,0.000000,0.000000,5.000000e-01,5,0.000000,0.000000
119,totalAssets,0.000000,0.000000,5.000000e-01,5,0.000000,0.000000


In [6]:
# Output 10 most important items to LaTeX
# Rename importance to 'Average Drop in Accuracy'
# Rename stddev to 'Standard Deviation'
# Rename pvalue to 'P-Value'
fi_latex = fi.reset_index().rename(columns={'Unnamed: 0': 'feature'})[['feature', 'importance', 'stddev', 'p_value']].rename(columns={'feature': 'Feature', 'importance': 'Average Drop in Accuracy', 'stddev': 'Standard Deviation', 'p_value': 'P-Value'}).head(10)

# Clean up column values, being sure to remove underscores

fi_latex

Unnamed: 0,Feature,Average Drop in Accuracy,Standard Deviation,P-Value
0,rating_on_previous_fixed_quarter_date,0.714769,0.013384,1.474416e-08
1,weightedAverageShsOutDil,0.003381,0.000975,0.0007443162
2,totalNonCurrentAssets,0.002669,0.0,0.5
3,propertyPlantEquipmentNet,0.001957,0.000744,0.002090536
4,marketCap,0.00089,0.00089,0.04450467
5,totalLiabilities,0.000178,0.000398,0.1869505
6,changeInWorkingCapital,0.0,0.0,0.5
7,revenue,0.0,0.0,0.5
8,freeCashFlow,0.0,0.0,0.5
9,capitalExpenditure,0.0,0.0,0.5


In [7]:
# Output
fi_latex.to_latex('../../../../Output/Modelling/Autogluon/Autogluon_Tabular_Only_SCF_Medium_Presets_feature_importance.tex', index=False)