# Training

## Importing Libraries

In [1]:
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import r2_score
import pandas as pd
import numpy as np
import pickle

## Loading Preprocessed Dataset

In [2]:
df = pd.read_csv('G:\Ashish Yadav\Backup Google Drive\Documents-Lecture\Github\ML-Mini-Project\data\processed\preprocessed.csv')

In [3]:
X = df.iloc[:,:-1].values
y = df.iloc[:,-1].values

## Training using Random Forest Regressor

In [4]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25,random_state=3)# 3-15
model = RandomForestRegressor(random_state=10)# 6-8-10
encoding = ('encoding',OneHotEncoder(sparse_output=False,drop='first',handle_unknown='ignore'),[0,1,4,5,6])
preprocessing = ColumnTransformer(
    transformers=[encoding],
    remainder='passthrough'
)
pipe = Pipeline([
    ('preprocessing',preprocessing),
    ('model',model)
])
pipe.fit(X_train,y_train)

## Evaluating the Model

In [5]:
y_pred = pipe.predict(X_test)
print(f'Train Score: {round(pipe.score(X_train,y_train)*100,2)}%')
print(f'Test Score: {round(pipe.score(X_test,y_test)*100,2)}%')
print(f'R2 Score: {round(r2_score(y_test,y_pred)*100,2)}%')



Train Score: 97.93%
Test Score: 92.25%
R2 Score: 92.25%




## Saving the model

In [6]:
with open('G:\Ashish Yadav\Backup Google Drive\Documents-Lecture\Github\ML-Mini-Project\data\\trained_models\\rfr_model.pkl','wb') as file:
  pickle.dump(pipe,file)