<a href="https://colab.research.google.com/github/ananddiv/datascience/blob/Linear-Regression/model_selection_scaffold.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import necessary libraries
import operator
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from prettytable import PrettyTable
from sklearn.metrics import mean_squared_error
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures


In [2]:
# Load the data from the csv file
df = pd.read_csv("dataset.csv")

# Take a quick look at the data
df.head()


Unnamed: 0,x,y
0,4.98,24.0
1,9.14,21.6
2,4.03,34.7
3,2.94,33.4
4,5.33,36.2


In [3]:
# Use the values of column x as the predictor variable
x = df[['x']]

# Use the values of column y as the response variable
y = df['y'].values


In [4]:
### edTest(test_split) ###

# Split the data into train and test splits
# Set aside 25% for testing with a random state of 1
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.25,random_state = 1)


In [5]:
# Create an empty list to store test MSEs
test_error = []


### Linear Regression

In [6]:
### edTest(test_linear_mse) ###

# Initialize a simple Linear Regression model
model = LinearRegression()

# Fit the model on the train data
model.fit(x_train, y_train)

# Predict using the trained model on the test data
y_pred = model.predict(x_test)

# Compute the MSE of the test predictions
mse = mean_squared_error(y_test,y_pred)

# Append the error to the list initialized above
test_error.append(mse)


### Polynomial Regression

In [7]:
### edTest(test_poly_mse) ###

# Initialize a list of degree values to create polynomial features
degree_list = [2,5]

# Run a for loop through the degrees of the polynomial
for d in degree_list:

    # Compute the polynomial features for the train data, for the current degree
    X_train = PolynomialFeatures(degree=d).fit_transform(x_train)

    # Compute the polynomial features for the test data, for the current degree
    X_test = PolynomialFeatures(degree=d).fit_transform(x_test)

    # Initialize a linear regression model
    lreg = LinearRegression()

    # Fit the linear model on the transformed train data
    lreg.fit(X_train, y_train)

    # Predict using the trained model on the test data
    y_pred = lreg.predict(X_test)

    # Compute the MSE of the test predictions
    mse = mean_squared_error(y_test,y_pred)

    # Append the error to the list initialized above
    test_error.append(mse)


In [8]:
test_error

[13.581688275061781, 5.011790297235239, 92.27168055690166]

### kNN Regression

In [11]:
from inspect import modulesbyfile
### edTest(test_knn_mse) ###

# Initialize a list of k values to specify the number of neighbors
knn_list = [1,20]

# Loop through the k values from the list defined above
for i in knn_list:

    # Initialize a kNN model with the current k value
    model = KNeighborsRegressor(n_neighbors = i)

    # Fit the model on the train data
    model.fit(x_train,y_train)

    # Predict using the trained model on the test data
    y_pred = model.predict(x_test)

    # Compute the MSE of the test predictions
    mse = mean_squared_error(y_test, y_pred)

    # Append the error to the list initialized above
    test_error.append(mse)


In [12]:
# Helper code to visualize the MSE of the 5 models
pt = PrettyTable()
pt.field_names = ["Model","MSE"]
pt.add_row(["Linear Regression", round(test_error[0],2)])
pt.add_row(["Polynomial Model with Degree 2", round(test_error[1],2)])
pt.add_row(["Polynomial Model with Degree 5", round(test_error[2],2)])
pt.add_row(["KNN Model with k=1", round(test_error[3],2)])
pt.add_row(["KNN Model with k=20", round(test_error[4],2)])
print(pt)


+--------------------------------+-------+
|             Model              |  MSE  |
+--------------------------------+-------+
|       Linear Regression        | 13.58 |
| Polynomial Model with Degree 2 |  5.01 |
| Polynomial Model with Degree 5 | 92.27 |
|       KNN Model with k=1       | 70.56 |
|      KNN Model with k=20       | 47.26 |
+--------------------------------+-------+
