<a href="https://colab.research.google.com/github/chayan-mann/BYTE-ML-CHALLENGES/blob/main/challenge_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import metrics
import seaborn as sns

In [None]:
dataset = pd.read_csv('housing_price_dataset.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [3])], remainder='passthrough')
X = np.array(ct.fit_transform(X))


In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [None]:
print(X_train)

[[0.0 1.0 0.0 ... 3 1 2007]
 [1.0 0.0 0.0 ... 4 2 1985]
 [0.0 0.0 1.0 ... 4 3 1987]
 ...
 [0.0 1.0 0.0 ... 5 1 1987]
 [0.0 0.0 1.0 ... 4 1 1955]
 [1.0 0.0 0.0 ... 5 3 1966]]


In [None]:
print(y_train)

[282781.3288234  323552.36041975 344057.85454064 ... 317759.36720766
 295820.86614543 232085.49021619]


In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train[:, 3:] = sc.fit_transform(X_train[:, 3:])
X_test[:, 3:] = sc.transform(X_test[:, 3:])

In [None]:
print(X_train)

[[0.0 1.0 0.0 ... -0.44966556600439833 -1.2195932523151334
  1.0425611782272732]
 [1.0 0.0 0.0 ... 0.44608257344659474 0.0053899259845184185
  -0.01997831838972238]
 [0.0 0.0 1.0 ... 0.44608257344659474 1.2303731042841701
  0.07661618130273176]
 ...
 [0.0 1.0 0.0 ... 1.341830712897588 -1.2195932523151334
  0.07661618130273176]
 [0.0 0.0 1.0 ... 0.44608257344659474 -1.2195932523151334
  -1.4688958137765344]
 [1.0 0.0 0.0 ... 1.341830712897588 1.2303731042841701
  -0.9376260654680367]]


In [None]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

In [None]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[272072.   273143.34]
 [127928.   202150.47]
 [279896.   313958.04]
 ...
 [148852.   190117.63]
 [138500.   184728.92]
 [131600.   138914.73]]


In [None]:

# Python Implementation
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.5706578890401939

In [None]:
print('Train Score: ', regressor.score(X_train, y_train))
print('Test Score: ', regressor.score(X_test, y_test))
print('MAE:', metrics.mean_absolute_error(y_test, y_pred))
print('MSE:', metrics.mean_squared_error(y_test, y_pred))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
print('Var score:', metrics.explained_variance_score(y_test, y_pred))

Train Score:  0.5700463415966224
Test Score:  0.5706578890401939
MAE: 40237.82516690098
MSE: 2506523618.427335
RMSE: 50065.19368211147
Var score: 0.5706579900665179


In [None]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = regressor, X = X_train, y = y_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

Accuracy: 56.97 %
Standard Deviation: 1.17 %


In [None]:
from sklearn.model_selection import GridSearchCV
parameters = [{'C': [0.25, 0.5, 0.75, 1], 'kernel': ['linear']},
              {'C': [0.25, 0.5, 0.75, 1], 'kernel': ['rbf'], 'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]}]
grid_search = GridSearchCV(estimator = regressor,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)
grid_search.fit(X_train, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
print("Best Accuracy: {:.2f} %".format(best_accuracy*100))
print("Best Parameters:", best_parameters)

 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan]
Parameters: { "C", "kernel" } are not used.



Best Accuracy: nan %
Best Parameters: {'C': 0.25, 'kernel': 'linear'}



```

