In [17]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
import matplotlib.pyplot as plt 
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_absolute_percentage_error

In [33]:
SEED = 42
N_SPLITS = 5
N_ESTIMATORS = 200

In [35]:
data=pd.read_csv('train.csv')

In [37]:
data

Unnamed: 0,Component1_fraction,Component2_fraction,Component3_fraction,Component4_fraction,Component5_fraction,Component1_Property1,Component2_Property1,Component3_Property1,Component4_Property1,Component5_Property1,...,BlendProperty1,BlendProperty2,BlendProperty3,BlendProperty4,BlendProperty5,BlendProperty6,BlendProperty7,BlendProperty8,BlendProperty9,BlendProperty10
0,0.21,0.00,0.42,0.25,0.12,-0.021782,1.981251,0.020036,0.140315,1.032029,...,0.489143,0.607589,0.321670,-1.236055,1.601132,1.384662,0.305850,0.193460,0.580374,-0.762738
1,0.02,0.33,0.19,0.46,0.00,-0.224339,1.148036,-1.107840,0.149533,-0.354000,...,-1.257481,-1.475283,-0.437385,-1.402911,0.147941,-1.143244,-0.439171,-1.379041,-1.280989,-0.503625
2,0.08,0.08,0.18,0.50,0.16,0.457763,0.242591,-0.922492,0.908213,0.972003,...,1.784349,0.450467,0.622687,1.375614,-0.428790,1.161616,0.601289,0.872950,0.660000,2.024576
3,0.25,0.42,0.00,0.07,0.26,-0.577734,-0.930826,0.815284,0.447514,0.455717,...,-0.066422,0.483730,-1.865442,-0.046295,-0.163820,-0.209693,-1.840566,0.300293,-0.351336,-1.551914
4,0.26,0.16,0.08,0.50,0.00,0.120415,0.666268,-0.626934,2.725357,0.392259,...,-0.118913,-1.172398,0.301785,-1.787407,-0.493361,-0.528049,0.286344,-0.265192,0.430513,0.735073
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,0.50,0.12,0.00,0.26,0.12,0.279523,-0.054170,-0.391227,0.400222,1.032029,...,-0.028366,-0.327297,-0.316933,-1.294092,-0.530259,-0.421526,-0.320869,0.709627,-0.737244,-0.744289
1996,0.19,0.31,0.00,0.37,0.13,-0.887185,0.610050,0.178606,1.083154,-2.822749,...,-0.449245,0.156778,-0.367445,-0.938615,-0.577451,-0.209996,-0.370505,-0.195531,-0.032834,0.269718
1997,0.38,0.06,0.14,0.31,0.11,0.568978,-0.196759,-0.646318,-0.980070,1.032029,...,0.029135,0.164890,-0.092942,-1.134490,-0.437479,-0.695636,-0.101073,0.063650,0.624368,-0.477053
1998,0.50,0.16,0.00,0.18,0.16,-0.067453,0.321977,-0.137535,0.238507,0.017455,...,-0.232960,-0.464947,0.112536,-0.793522,-0.811272,-1.194914,0.100644,0.760116,-0.751394,-0.857598


In [39]:
(data.isnull().sum())

Component1_fraction    0
Component2_fraction    0
Component3_fraction    0
Component4_fraction    0
Component5_fraction    0
                      ..
BlendProperty6         0
BlendProperty7         0
BlendProperty8         0
BlendProperty9         0
BlendProperty10        0
Length: 65, dtype: int64

In [41]:
col=data.columns

In [43]:
col

Index(['Component1_fraction', 'Component2_fraction', 'Component3_fraction',
       'Component4_fraction', 'Component5_fraction', 'Component1_Property1',
       'Component2_Property1', 'Component3_Property1', 'Component4_Property1',
       'Component5_Property1', 'Component1_Property2', 'Component2_Property2',
       'Component3_Property2', 'Component4_Property2', 'Component5_Property2',
       'Component1_Property3', 'Component2_Property3', 'Component3_Property3',
       'Component4_Property3', 'Component5_Property3', 'Component1_Property4',
       'Component2_Property4', 'Component3_Property4', 'Component4_Property4',
       'Component5_Property4', 'Component1_Property5', 'Component2_Property5',
       'Component3_Property5', 'Component4_Property5', 'Component5_Property5',
       'Component1_Property6', 'Component2_Property6', 'Component3_Property6',
       'Component4_Property6', 'Component5_Property6', 'Component1_Property7',
       'Component2_Property7', 'Component3_Property7', 'C

In [45]:
BLEND_COLS = [f"Component{i}_fraction" for i in range(1, 6)]
COMPONENT_COLS = [f"Component{i}_Property{j}" for i in range(1, 6) for j in range(1, 11)]
FEATURE_COLS = BLEND_COLS + COMPONENT_COLS
TARGET_COLS = [f"BlendProperty{i}" for i in range(1, 11)]

In [47]:
x=data[COMPONENT_COLS].copy()

In [49]:
x

Unnamed: 0,Component1_Property1,Component1_Property2,Component1_Property3,Component1_Property4,Component1_Property5,Component1_Property6,Component1_Property7,Component1_Property8,Component1_Property9,Component1_Property10,...,Component5_Property1,Component5_Property2,Component5_Property3,Component5_Property4,Component5_Property5,Component5_Property6,Component5_Property7,Component5_Property8,Component5_Property9,Component5_Property10
0,-0.021782,-1.229799,-0.393322,-1.763085,0.051779,-1.483333,-0.509380,0.293815,0.480368,-1.244963,...,1.032029,0.216116,-3.082938,-1.410049,0.296713,1.741303,0.221237,0.513326,-0.636394,-2.728928
1,-0.224339,-1.075041,-0.691084,-1.869709,-0.375340,-0.222145,-1.204671,0.948338,-1.958826,-1.160435,...,-0.354000,1.273144,-2.086526,-0.175381,-0.436747,0.988643,0.125072,0.636329,1.439313,0.896222
2,0.457763,0.240404,-0.270626,-3.062889,0.185845,-1.519468,0.020619,-0.671136,-0.798978,0.006829,...,0.972003,-0.678255,1.437899,-0.047104,-3.052837,1.836633,-0.664966,1.039423,0.123834,-0.424314
3,-0.577734,-0.587815,0.912940,0.734976,-0.238266,-1.145807,-0.539061,0.754506,-0.534135,0.052972,...,0.455717,0.207318,-1.337976,-0.970700,1.066737,-0.261213,-0.096509,-1.273487,-0.973779,-0.576430
4,0.120415,0.127614,1.342376,-0.580060,0.638531,0.148538,-0.592914,-0.792930,-0.389350,2.104922,...,0.392259,0.626086,0.115239,0.487384,1.566761,-1.896231,0.507294,0.009196,-0.467038,-2.038341
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,0.279523,0.011874,-0.736471,-0.986249,-0.777966,-0.583084,-0.323471,1.918858,1.138839,-0.322096,...,1.032029,1.273144,0.671960,-2.528199,1.247373,-0.063352,1.103905,0.012334,0.766653,0.641193
1996,-0.887185,-0.298955,0.123896,0.328788,-1.211101,-0.744268,-1.032092,0.725666,-0.782418,0.461525,...,-2.822749,0.957581,-0.205213,-0.570903,-0.225686,0.848864,-0.992585,-0.520475,-2.537512,0.397253
1997,0.568978,0.200403,-0.827246,-1.407670,-1.529292,-2.130100,0.093668,-0.715942,-0.813747,-0.694918,...,1.032029,-1.056835,0.556034,0.249002,0.662351,0.771486,-0.593056,1.603266,1.567580,-0.284672
1998,-0.067453,-0.249773,-0.954930,0.085074,-0.764216,-0.488214,0.854897,1.800529,1.262477,0.628131,...,0.017455,0.510039,-0.174300,-1.067977,0.719926,-2.170991,0.678781,-0.724202,-0.161447,0.193507


In [51]:
y=data[TARGET_COLS].copy()

In [53]:
y

Unnamed: 0,BlendProperty1,BlendProperty2,BlendProperty3,BlendProperty4,BlendProperty5,BlendProperty6,BlendProperty7,BlendProperty8,BlendProperty9,BlendProperty10
0,0.489143,0.607589,0.321670,-1.236055,1.601132,1.384662,0.305850,0.193460,0.580374,-0.762738
1,-1.257481,-1.475283,-0.437385,-1.402911,0.147941,-1.143244,-0.439171,-1.379041,-1.280989,-0.503625
2,1.784349,0.450467,0.622687,1.375614,-0.428790,1.161616,0.601289,0.872950,0.660000,2.024576
3,-0.066422,0.483730,-1.865442,-0.046295,-0.163820,-0.209693,-1.840566,0.300293,-0.351336,-1.551914
4,-0.118913,-1.172398,0.301785,-1.787407,-0.493361,-0.528049,0.286344,-0.265192,0.430513,0.735073
...,...,...,...,...,...,...,...,...,...,...
1995,-0.028366,-0.327297,-0.316933,-1.294092,-0.530259,-0.421526,-0.320869,0.709627,-0.737244,-0.744289
1996,-0.449245,0.156778,-0.367445,-0.938615,-0.577451,-0.209996,-0.370505,-0.195531,-0.032834,0.269718
1997,0.029135,0.164890,-0.092942,-1.134490,-0.437479,-0.695636,-0.101073,0.063650,0.624368,-0.477053
1998,-0.232960,-0.464947,0.112536,-0.793522,-0.811272,-1.194914,0.100644,0.760116,-0.751394,-0.857598


In [55]:
model = MultiOutputRegressor(XGBRegressor(
    n_estimators=2500,
    learning_rate=0.05,
    max_depth=4,
    reg_lambda=1,
    random_state=SEED,
    n_jobs=-1
))


In [57]:
scaler = StandardScaler()
x_scaled = scaler.fit_transform(x)

In [59]:
from sklearn.model_selection import train_test_split

In [61]:
x_train,x_test,y_train,y_test=train_test_split(x_scaled,y,test_size=0.3,random_state=42)

In [63]:
print(x_train.shape,x_test.shape,y_train.shape,y_test.shape)

(1400, 50) (600, 50) (1400, 10) (600, 10)


In [65]:
model.fit(x_train,y_train)

In [66]:
y_pred=model.predict(x_test)

In [67]:
y_pred.shape

(600, 10)

In [68]:
from sklearn.metrics import mean_absolute_percentage_error
mean=mean_absolute_percentage_error(y_test,y_pred)

In [69]:
print(mean)

3.856662510441094


In [70]:
from sklearn.metrics import r2_score

In [71]:
s=r2_score(y_test,y_pred)

In [72]:
s

0.06806139308504723

In [73]:
from sklearn.ensemble import GradientBoostingRegressor

In [74]:
model1 = MultiOutputRegressor(GradientBoostingRegressor(n_estimators=1000, learning_rate=0.1, max_depth=3, random_state=42))

In [75]:
model1.fit(x_train,y_train)

In [76]:
y_prediction=model1.predict(x_test)

In [77]:
score_=mean_absolute_percentage_error(y_test,y_pred)

In [78]:
score_

3.856662510441094

In [79]:
pip install lightgbm

Note: you may need to restart the kernel to use updated packages.


In [80]:
import lightgbm as lgb

In [275]:
model2=lgb.LGBMRegressor(n_estimators=500,learning_rate=0.000010000,max_depth=6)

In [277]:
model3=MultiOutputRegressor(model2)

In [279]:
model3.fit(x_train,y_train)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000628 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12748
[LightGBM] [Info] Number of data points in the train set: 1400, number of used features: 50
[LightGBM] [Info] Start training from score 0.001761
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000969 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12748
[LightGBM] [Info] Number of data points in the train set: 1400, number of used features: 50
[LightGBM] [Info] Start training from score -0.000935
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000783 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12748
[LightGBM] [Info] Number of data points in the train set: 1400, number of used features: 50
[LightGBM] [Info] Start tr

In [280]:
lgbprediction=model3.predict(x_test)

In [281]:
score_of_lgb=mean_absolute_percentage_error(y_test,lgbprediction)

In [282]:
score_of_lgb

1.0783680362671133

In [291]:
pip install catboost

Collecting catboost
  Downloading catboost-1.2.8-cp312-cp312-win_amd64.whl.metadata (1.5 kB)
Collecting graphviz (from catboost)
  Downloading graphviz-0.21-py3-none-any.whl.metadata (12 kB)
Downloading catboost-1.2.8-cp312-cp312-win_amd64.whl (102.4 MB)
   ---------------------------------------- 0.0/102.4 MB ? eta -:--:--
   ---------------------------------------- 0.0/102.4 MB ? eta -:--:--
   ---------------------------------------- 0.1/102.4 MB 1.1 MB/s eta 0:01:34
   ---------------------------------------- 0.1/102.4 MB 1.6 MB/s eta 0:01:05
   ---------------------------------------- 0.1/102.4 MB 1.6 MB/s eta 0:01:05
   ---------------------------------------- 0.1/102.4 MB 1.6 MB/s eta 0:01:05
   ---------------------------------------- 0.3/102.4 MB 1.1 MB/s eta 0:01:32
   ---------------------------------------- 0.3/102.4 MB 1.1 MB/s eta 0:01:32
   ---------------------------------------- 0.3/102.4 MB 1.1 MB/s eta 0:01:32
   ---------------------------------------- 0.3/102.4 MB 

In [292]:
from catboost import CatBoostRegressor

In [309]:
model4=CatBoostRegressor(n_estimators=2000,learning_rate=0.00001)

In [299]:
model5=MultiOutputRegressor(model4)

In [None]:
model5.fit(x_train,y_train)

0:	learn: 1.0038110	total: 4.16ms	remaining: 828ms
1:	learn: 1.0038099	total: 8.09ms	remaining: 800ms
2:	learn: 1.0038090	total: 11.3ms	remaining: 743ms
3:	learn: 1.0038077	total: 14.6ms	remaining: 714ms
4:	learn: 1.0038066	total: 17.8ms	remaining: 693ms
5:	learn: 1.0038054	total: 21.4ms	remaining: 691ms
6:	learn: 1.0038040	total: 24.5ms	remaining: 676ms
7:	learn: 1.0038028	total: 27.9ms	remaining: 669ms
8:	learn: 1.0038017	total: 30.9ms	remaining: 656ms
9:	learn: 1.0038006	total: 34.2ms	remaining: 650ms
10:	learn: 1.0037994	total: 37.4ms	remaining: 643ms
11:	learn: 1.0037983	total: 40.5ms	remaining: 634ms
12:	learn: 1.0037971	total: 43.7ms	remaining: 628ms
13:	learn: 1.0037964	total: 47.2ms	remaining: 627ms
14:	learn: 1.0037953	total: 50.5ms	remaining: 623ms
15:	learn: 1.0037943	total: 53.7ms	remaining: 618ms
16:	learn: 1.0037932	total: 57ms	remaining: 613ms
17:	learn: 1.0037919	total: 60.1ms	remaining: 608ms
18:	learn: 1.0037909	total: 63.3ms	remaining: 603ms
19:	learn: 1.0037897	tot

In [None]:
catprediction=model5.predict(x_test)

In [None]:
score_of_cat=mean_absolute_percentage_error(y_test,catprediction)

In [None]:
score_of_cat