# Weight Predictor using several algorithms with many **Ensemble Techniques**.

## Model Structure
***
`model`<br>
    | <br>
    |----- `v1` (*bagging*)<br>
    |       |<br>
    |       |<br>
    |       |----- `knn`<br>
    |       |<br>
    |       |----- `lr`<br>
    |       |<br>
    |       |----- `dtree`<br>
    |       |<br>
    |       |----- `elasticnet`<br>
    |<br>
    |<br>
    |----- `v2` (*boosting: adaboost*)<br>
    |       |<br>
    |       |<br>
    |       |----- `knn`<br>
    |       |<br>
    |       |----- `lr`<br>
    |       |<br>
    |       |----- `dtree`<br>
    |       |<br>
    |       |----- `elasticnet`<br>
    |<br>
    |<br>
    |----- `v3` (*bagging & boosting*)<br>
    |       |<br>
    |       |<br>
    |       |----- `rf` (*bagging*)<br>
    |       |<br>
    |       |----- `rf` (*boosting*)<br>

In [129]:
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.ensemble import AdaBoostRegressor, VotingRegressor, BaggingRegressor, RandomForestRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import Normalizer
from sklearn.linear_model import LinearRegression, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score, root_mean_squared_error, balanced_accuracy_score
from mlxtend.plotting import plot_decision_regions

In [130]:
try:
    df = pd.read_csv(r"/workspaces/ML-Journey/Datasets/Dataset for weight prediction with gender.csv")
except:
    df = pd.read_csv(r"C:\\Users\\Archit\\Documents\\GitHub\\ML-Journey\\Datasets\\Dataset for weight prediction with gender.csv")

X = df.drop(columns=['weight_kg'])
y = df[['weight_kg']]

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2)

norm = Normalizer()
X_train_trf = norm.fit_transform(X_train)
X_test_trf = norm.transform(X_test)

<br><br><br>
## `v1` - Bagging (L0)
***

### KNN -> Bagging

In [131]:
knn_bag = BaggingRegressor(KNeighborsRegressor(n_neighbors=15))
knn_bag.fit(X_train_trf, y_train)
y_pred = knn_bag.predict(X_test_trf)
r2_score(y_test, y_pred)

  return column_or_1d(y, warn=True)


0.8767212282796998

### Linear Regression -> Bagging

In [132]:
lr_bag = BaggingRegressor(LinearRegression())
lr_bag.fit(X_train_trf, y_train)
y_pred = lr_bag.predict(X_test_trf)
r2_score(y_test, y_pred)

  return column_or_1d(y, warn=True)


0.8739140652027122

### Elasticnet -> Bagging

In [133]:
elasticnet_bag = BaggingRegressor(ElasticNet(alpha=0.001), n_estimators=50)
elasticnet_bag.fit(X_train, y_train)
y_pred = elasticnet_bag.predict(X_test)
r2_score(y_test, y_pred)

  return column_or_1d(y, warn=True)


0.876129443809892

### Decision Tree -> Bagging

In [189]:
dtree_bag = BaggingRegressor(DecisionTreeRegressor(ccp_alpha=0.1), n_estimators=50)
dtree_bag.fit(X_train_trf, y_train)
y_pred = dtree_bag.predict(X_test_trf)
r2_score(y_test, y_pred)

  return column_or_1d(y, warn=True)


0.8768464793563044

<br><br><br>
## `v2` - Boosting (L0)
***

### KNN - Boosting

In [135]:
knn_bos = AdaBoostRegressor(KNeighborsRegressor(n_neighbors=7), random_state=420)
knn_bos.fit(X_train_trf, y_train)
y_pred = knn_bos.predict(X_test_trf)
r2_score(y_test, y_pred)

  y = column_or_1d(y, warn=True)


0.8698867962090915

### Linear Regression - Boosting

In [136]:
lr_bos = AdaBoostRegressor(LinearRegression(), learning_rate=0.01)
lr_bos.fit(X_train_trf, y_train)
y_pred = lr_bos.predict(X_test_trf)
r2_score(y_test, y_pred)

  y = column_or_1d(y, warn=True)


0.8739812848463383

### ElasticNet -> Boosting

In [170]:
elasticnet_bos = AdaBoostRegressor(ElasticNet(alpha=0.25, tol=7),learning_rate=0.1, random_state=4200, n_estimators=500)
elasticnet_bos.fit(X_train, y_train)
y_pred = elasticnet_bos.predict(X_test)
r2_score(y_test, y_pred)

  y = column_or_1d(y, warn=True)


0.8618523709932356

### Decision Tree -> Boosting

In [188]:
dtree_bos = AdaBoostRegressor(DecisionTreeRegressor(ccp_alpha=0.1), n_estimators=50, learning_rate=0.001)
dtree_bos.fit(X_train_trf, y_train)
y_pred = dtree_bos.predict(X_test_trf)
r2_score(y_test, y_pred)

  y = column_or_1d(y, warn=True)


0.8747958805048787

<br><br><br>
## `v3` - Bagging & Boosting (L0)
***

### Random Forest

In [199]:
rf = RandomForestRegressor(ccp_alpha=0.1)
rf.fit(X_train_trf, y_train)
y_pred = rf.predict(X_test_trf)
r2_score(y_test, y_pred)

  return fit_method(estimator, *args, **kwargs)


0.8770416943320881

### Random Forest -> Bagging

In [203]:
rf_bag = BaggingRegressor(RandomForestRegressor(ccp_alpha=0.1), n_estimators=5)
rf_bag.fit(X_train_trf, y_train)
y_pred = rf_bag.predict(X_test_trf)
r2_score(y_test, y_pred)

  return column_or_1d(y, warn=True)


0.877157020277395

### Random Forest -> Boosting

In [207]:
rf_bos = AdaBoostRegressor(RandomForestRegressor(ccp_alpha=0.1), n_estimators=5, random_state=42)
rf_bos.fit(X_train_trf, y_train)
y_pred = rf_bos.predict(X_test_trf)
r2_score(y_test, y_pred)

  y = column_or_1d(y, warn=True)


0.8760689146450875

<br><br><br>
## L1
***

In [215]:
v1 = VotingRegressor( estimators=[
    ('knn', knn_bag),
    ('lr', lr_bag),
    ('dt', dtree_bag),
    ('en', elasticnet_bag)
])
v1

In [216]:
v2 = VotingRegressor( estimators=[
    ('knn', knn_bos),
    ('lr', lr_bos),
    ('dt', dtree_bos),
    ('en', elasticnet_bos)
])
v2

In [217]:
v3 = VotingRegressor(estimators=[
    ('rf', rf),
    ('rf_bag', rf_bag),
    ('rf_bos', rf_bos)
])
v3

<br><br>

## L2
***

In [210]:
model = VotingRegressor(estimators=[
    ('v1',v1),
    ('v2',v2),
    ('v3',v3)
])

In [212]:
model.fit(X_train_trf, y_train)

  y = column_or_1d(y, warn=True)


In [213]:
model_pred = model.predict(X_test_trf)

In [214]:
r2_score(y_test, model_pred)

0.8544922510158113