<a href="https://colab.research.google.com/github/bhushanmandava/Gradient-Boosting-Alogrithms-Regression-/blob/main/lightgbm_regressor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# LightGBM Regressor

## Part 1 - Data Preprocessing

### Importing the dataset

In [None]:
import pandas as pd
dataset = pd.read_csv('insurance.csv')

In [None]:
dataset.head()

### Checking missing data

In [None]:
dataset.info()

### Handling categorical variables

Sex column

In [None]:
dataset['sex'].unique()

In [None]:
dataset['sex'] = dataset['sex'].apply(lambda x: 0 if x == 'female' else 1)

In [None]:
dataset.head()

Smoker column

In [None]:
dataset['smoker'].unique()

In [None]:
dataset['smoker'] = dataset['smoker'].apply(lambda x: 0 if x == 'no' else 1)

In [None]:
dataset.head()

Region column

In [None]:
dataset['region'].unique()

In [None]:
region_dummies = pd.get_dummies(dataset['region'], drop_first = True)

In [None]:
region_dummies

In [None]:
dataset = pd.concat([region_dummies, dataset], axis = 1)

In [None]:
dataset.head()

In [None]:
dataset.drop(['region'], axis = 1, inplace = True)

In [None]:
dataset.head()

### Creating the Training Set and the Test Set

Getting the inputs and output

In [None]:
X = dataset.iloc[:, :-1].values

In [None]:
y = dataset.iloc[:, -1].values

In [None]:
X

In [None]:
y

Getting the Training Set and the Test Set

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## Part 2 - Building and training the model

### Building the model

In [None]:
import lightgbm as lgb
model = lgb.LGBMRegressor()

### Training the model

In [None]:
model.fit(X_train, y_train)

### Inference

In [None]:
y_pred = model.predict(X_test)

## Part 3: Evaluating the model

### R-Squared

In [None]:
from sklearn.metrics import r2_score
r2 = r2_score(y_test, y_pred)

In [None]:
r2

### Adjusted R-Squared

In [None]:
k = X_test.shape[1]
n = len(X_test)
adj_r2 = 1-(1-r2)*(n-1)/(n-k-1)

In [None]:
adj_r2

### k-Fold Cross Validation

In [None]:
from sklearn.model_selection import cross_val_score
r2s = cross_val_score(estimator = model,
                      X = X,
                      y = y,
                      scoring = 'r2',
                      cv = 10)
print("R-Squared: {:.2f} %".format(r2s.mean()*100))
print("Standard Deviation: {:.2f} %".format(r2s.std()*100))

### Grid Search

In [None]:
from sklearn.model_selection import GridSearchCV
parameters = [{'num_leaves': [29,30,31,32,33], 'learning_rate': [0.08,0.09,0.1,0.11,0.12], 'n_estimators': [80,90,100,110,120]}]
grid_search = GridSearchCV(estimator = model,
                           param_grid = parameters,
                           scoring = 'r2',
                           cv = 10)
grid_search.fit(X, y)
best_r2 = grid_search.best_score_
best_parameters = grid_search.best_params_
print("Best R-Squared: {:.2f} %".format(best_r2*100))
print("Best Parameters:", best_parameters)