# Assignment 4 â€” Linear & Polynomial Regression (Auto MPG)
This notebook explores linear and polynomial regression.

## Import Libraries

In [None]:

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, learning_curve, validation_curve
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score


## Load Dataset

In [None]:

df = sns.load_dataset('mpg')
df = df.dropna()
df.head()


## Feature Selection

In [None]:

X = df[['cylinders','horsepower','weight','displacement']]
y = df['mpg']


## Train-Test Split

In [None]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## Linear Regression

In [None]:

lin = LinearRegression()
lin.fit(X_train, y_train)
pred_lin = lin.predict(X_test)

rmse_lin = np.sqrt(mean_squared_error(y_test, pred_lin))
r2_lin = r2_score(y_test, pred_lin)

rmse_lin, r2_lin


## Polynomial Regression

In [None]:

poly2 = Pipeline([('poly', PolynomialFeatures(degree=2)),
                  ('model', LinearRegression())])

poly3 = Pipeline([('poly', PolynomialFeatures(degree=3)),
                  ('model', LinearRegression())])

poly2.fit(X_train, y_train)
poly3.fit(X_train, y_train)

pred2 = poly2.predict(X_test)
pred3 = poly3.predict(X_test)

rmse2 = np.sqrt(mean_squared_error(y_test, pred2))
rmse3 = np.sqrt(mean_squared_error(y_test, pred3))

r2_2 = r2_score(y_test, pred2)
r2_3 = r2_score(y_test, pred3)

rmse2, rmse3, r2_2, r2_3


## Learning Curve

In [None]:

train_sizes, train_scores, val_scores = learning_curve(
    LinearRegression(), X, y, cv=5)

plt.plot(train_sizes, train_scores.mean(), label='Training')
plt.plot(train_sizes, val_scores.mean(), label='Validation')
plt.legend()
plt.title('Learning Curve')
plt.show()


## Validation Curve

In [None]:

degrees = [1,2,3,4,5]

train_scores, val_scores = validation_curve(
    Pipeline([('poly', PolynomialFeatures()), ('model', LinearRegression())]),
    X, y, param_name='poly__degree', param_range=degrees, cv=5)

plt.plot(degrees, train_scores.mean(axis=1), label='Training')
plt.plot(degrees, val_scores.mean(axis=1), label='Validation')
plt.xlabel('Degree')
plt.ylabel('Score')
plt.title('Validation Curve')
plt.legend()
plt.show()


## Discussion
Write your conclusions here.