# Model Comparison
Created by Abdoulaye Sanni B.

In [1]:
import pandas as pd
#import pycaret regression module 
from pycaret.regression import * 

# 1.0 Import the Data 

In [2]:
Data = pd.read_excel("FRP-RC_Columns_Database.xlsx", usecols="A:AA", header=0)
Data.head()

Unnamed: 0,No.,Spec.,b,h,D,H,LamdaC,Ag,Circular,TypeCon,fcp,TypeL,RhoEf,Bars,EfrpL,ffuL,efuL,TypeH,Bars.1,Config,SpacPitch,EfrpH,ffuH,efuH,e,EoverD,Pexp
0,1,A-12,610,610,-,3000,17,372100,No,NWC,43.7,GFRP,1.0,8 No. 8,44.2,608,1.38,GFRP,No. 4,Ties,305,-,-,-,0.0,0,15235.0
1,2,B-12,610,610,-,3000,17,372100,No,NWC,40.6,GFRP,1.0,8 No. 8,44.4,712,1.6,GFRP,No. 4,Ties,305,-,-,-,0.0,0,12949.0
2,3,A-3,610,610,-,3000,17,372100,No,NWC,36.1,GFRP,1.0,8 No. 8,44.2,608,1.38,GFRP,No. 4,Ties,76,-,-,-,0.0,0,11926.0
3,4,B-3,610,610,-,3000,17,372100,No,NWC,32.8,GFRP,1.0,8 No. 8,44.4,712,1.6,GFRP,No. 4,Ties,76,-,-,-,0.0,0,10751.0
4,5,C-G-1-120,350,350,-,1400,14,122500,No,NWC,32.6,GFRP,1.9,8 No. 19,47.6,728,1.53,GFRP,No.13 ties,Ties,120,44,640,1.45,0.0,0,3928.575


# 2.0 Convert into Numerical Values

In [3]:
Data['LamdaC'] = Data['LamdaC'].astype(float)
Data['SpacPitch'] = Data['SpacPitch'].astype(float)
Data['EoverD'] = Data['EoverD'].astype(float)
Data['ffuL'] = Data['ffuL'].astype(float)
Data['Ag'] = Data['Ag'].astype(float)
Data['fcp'] = Data['fcp'].astype(float)
Data['RhoEf'] = Data['RhoEf'].astype(float)
Data['Pexp'] = Data['Pexp'].astype(float)
Data['EfrpL'] = Data['EfrpL'].astype(float)

In [4]:
Donnees = Data[[ 'LamdaC','Circular', 'Ag', 'TypeCon', 'fcp', 'TypeL', 'RhoEf', 'EfrpL', 'ffuL', 'TypeH', 'Config', 'SpacPitch','EoverD','Pexp']]


# 3.0 Setting up Environment in PyCaret

In [5]:
exp_reg = setup(Donnees, target = 'Pexp',session_id=123,
                  normalize = True, transformation = True, transform_target = True, 
                  combine_rare_levels = True, rare_level_threshold = 0.05,
                  remove_multicollinearity = True, multicollinearity_threshold = 0.95,ordinal_features = { 'Circular' : ['Yes', 'No'], 
                                                                                                         'TypeCon' : ['NWC', 'LWC', 'GC'],
                                                                                                         'TypeL' : ['GFRP', 'CFRP', 'BFRP'],
                                                                                                         'TypeH' : ['GFRP', 'CFRP', 'BFRP', 'Steel'],
                                                                                                         'Config' : ['Ties', 'Spiral', 'Hoops']})

Unnamed: 0,Description,Value
0,session_id,123
1,Target,Pexp
2,Original Data,"(283, 14)"
3,Missing Values,False
4,Numeric Features,8
5,Categorical Features,5
6,Ordinal Features,True
7,High Cardinality Features,False
8,High Cardinality Method,
9,Transformed Train Set,"(198, 13)"


# 4.0 Comparaing All Models

In [6]:
compare_models()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
et,Extra Trees Regressor,176.967,148646.7775,317.525,0.9616,0.1469,0.1043,0.068
xgboost,Extreme Gradient Boosting,187.4581,151498.7991,339.4472,0.9609,0.1512,0.1052,0.116
gbr,Gradient Boosting Regressor,219.7345,289470.1051,431.8307,0.9462,0.1506,0.1092,0.016
catboost,CatBoost Regressor,224.2344,255150.5013,428.5931,0.9379,0.1822,0.1331,0.246
dt,Decision Tree Regressor,242.5058,234857.5755,437.9817,0.9304,0.2362,0.1491,0.009
rf,Random Forest Regressor,271.5858,388194.7193,523.6403,0.9202,0.2034,0.1462,0.086
lr,Linear Regression,388.7685,460728.7124,604.9384,0.8941,0.2621,0.228,0.442
lar,Least Angle Regression,388.7685,460728.7124,604.9384,0.8941,0.2621,0.228,0.008
ridge,Ridge Regression,389.5288,476226.6189,611.1538,0.8935,0.2613,0.2273,0.009
br,Bayesian Ridge,389.4704,474608.4842,610.5181,0.8935,0.2614,0.2274,0.008


PowerTransformedTargetRegressor(bootstrap=False, ccp_alpha=0.0, criterion='mse',
                                max_depth=None, max_features='auto',
                                max_leaf_nodes=None, max_samples=None,
                                min_impurity_decrease=0.0,
                                min_impurity_split=None, min_samples_leaf=1,
                                min_samples_split=2,
                                min_weight_fraction_leaf=0.0, n_estimators=100,
                                n_jobs=-1, oob_score=False,
                                power_transformer_method='box-cox',
                                powe...
                                regressor=ExtraTreesRegressor(bootstrap=False,
                                                              ccp_alpha=0.0,
                                                              criterion='mse',
                                                              max_depth=None,
                             

# 5.0 Create Extra Trees Model

In [7]:
extratree = create_model('et')

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,73.0724,10596.6704,102.9401,0.9916,0.076,0.056
1,189.2241,59468.0142,243.8606,0.9663,0.17,0.1227
2,198.7195,112721.4629,335.7402,0.9307,0.1616,0.1058
3,419.4316,671196.5203,819.2658,0.9354,0.2819,0.2001
4,155.7435,50543.0392,224.8178,0.9712,0.2156,0.1553
5,154.4005,86030.4098,293.3094,0.992,0.0855,0.0631
6,122.1006,32718.0368,180.8813,0.9813,0.1393,0.1035
7,152.7786,37467.0083,193.564,0.9761,0.1503,0.1149
8,79.9718,20916.0295,144.6238,0.9964,0.053,0.0392
9,224.2276,404810.5839,636.2473,0.8754,0.136,0.0825


# 6.0 Create XGBoost Model

In [8]:
xgboost = create_model('xgboost')

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,74.7826,11579.7627,107.6093,0.9908,0.0852,0.0605
1,256.7911,113683.2969,337.1695,0.9356,0.1605,0.1203
2,150.0098,79136.1875,281.3116,0.9514,0.1217,0.0764
3,383.3358,540441.8125,735.1475,0.948,0.1964,0.1451
4,108.1521,33451.1953,182.8967,0.9809,0.2701,0.1601
5,265.454,285345.8125,534.1777,0.9734,0.119,0.0831
6,174.9538,65397.5508,255.7294,0.9626,0.2123,0.1654
7,158.352,57930.7109,240.688,0.963,0.1602,0.122
8,94.5239,30317.7246,174.1199,0.9948,0.0754,0.0499
9,208.2264,297703.9375,545.6225,0.9084,0.1114,0.069
