In [10]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV

spaceship = pd.read_csv("https://raw.githubusercontent.com/data-bootcamp-v4/data/main/spaceship_titanic.csv")
spaceship.head()

spaceship.shape
spaceship.dtypes
spaceship.isnull().sum()
spaceship.dropna(inplace=True)

spaceship['Cabin']=spaceship['Cabin'].str[0]
spaceship=spaceship.drop(['PassengerId','Name'],axis =1)


spaceship_with_dummies = pd.get_dummies(spaceship,drop_first=True)
X = spaceship_with_dummies.drop('Transported',axis=1)
y = spaceship_with_dummies['Transported']

from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size =0.2,random_state=42)

from sklearn.preprocessing import MinMaxScaler
normalizer = MinMaxScaler()
normalizer.fit(X_train)

X_train_norm = normalizer.transform(X_train)
X_test_norm = normalizer.transform(X_test)
X_train_norm = pd.DataFrame(X_train_norm,columns = X_train.columns)
X_test_norm = pd.DataFrame(X_test_norm,columns = X_test.columns)


# # Gradient Boosting
# gb_reg = GradientBoostingRegressor(max_depth=4,
#                                    n_estimators=100)
# gb_reg.fit(X_train_norm, y_train)
# pred = gb_reg.predict(X_test_norm)
# print("MAE", mean_absolute_error(pred, y_test))
# print("RMSE", mean_squared_error(pred, y_test, squared=False))
# print("R2 score", gb_reg.score(X_test_norm, y_test))


# Grid Search

grid = {"n_estimators": [50, 100, 200,500],
        "max_depth": [5, 10, 30,100],
        "learning_rate":[0.01,0.05,0.1],
        "subsample":[0.8,1.0]
       }

gb_reg2 = GradientBoostingRegressor()
model = GridSearchCV(estimator = gb_reg2, param_grid = grid, cv=5,verbose=3)
model.fit(X_train_norm, y_train)

model.best_params_
best_model = model.best_estimator_

pred = best_model.predict(X_test_norm)

print("MAE", mean_absolute_error(pred, y_test))
print("RMSE", mean_squared_error(pred, y_test, squared=False))
print("R2 score", best_model.score(X_test_norm, y_test))
print("Best parameters found is:",model.best_params_)

Fitting 5 folds for each of 96 candidates, totalling 480 fits
[CV 1/5] END learning_rate=0.01, max_depth=5, n_estimators=50, subsample=0.8;, score=0.274 total time=   0.1s
[CV 2/5] END learning_rate=0.01, max_depth=5, n_estimators=50, subsample=0.8;, score=0.256 total time=   0.1s
[CV 3/5] END learning_rate=0.01, max_depth=5, n_estimators=50, subsample=0.8;, score=0.262 total time=   0.1s
[CV 4/5] END learning_rate=0.01, max_depth=5, n_estimators=50, subsample=0.8;, score=0.259 total time=   0.1s
[CV 5/5] END learning_rate=0.01, max_depth=5, n_estimators=50, subsample=0.8;, score=0.277 total time=   0.1s
[CV 1/5] END learning_rate=0.01, max_depth=5, n_estimators=50, subsample=1.0;, score=0.272 total time=   0.1s
[CV 2/5] END learning_rate=0.01, max_depth=5, n_estimators=50, subsample=1.0;, score=0.254 total time=   0.1s
[CV 3/5] END learning_rate=0.01, max_depth=5, n_estimators=50, subsample=1.0;, score=0.261 total time=   0.1s
[CV 4/5] END learning_rate=0.01, max_depth=5, n_estimators

