In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import metrics

# Content

## I. [Regression](#I.-Regression)
* 1. [Linear Regression](#1.-Linear-Regression)
* 2. [Lasso Regression](#2.-Lasso-Regression)
* 3. [Ridge Regression](#3.-Ridge-Regression)
* 4. [ElasticNet Regresion](#4.-ElasticNet-Regression)
* 5. [Polynomial Regression(Feature Expansion)](#5.-Polynomial-Regression(Feature-Expansion))

## II. [Classification](#Classification)

# I. Regression

The Regression parts will use [Superconductivty](https://archive.ics.uci.edu/ml/datasets/Superconductivty+Data) dataset and different regressors to predict critical tempeture.

Regression Process:

* Import regressor from library
* Define regressor
* Data standardization if needed
* Fit the model with training data
* Predict result using testing data

In [3]:
# read data
df = pd.read_csv('train.csv')
X = df.drop(['critical_temp'], axis=1)
y = df['critical_temp'].copy()

# split training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

## 1. Linear Regression 

In [7]:
from sklearn.linear_model import LinearRegression
# data standardization
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# declar regressor
linear_reg = LinearRegression()

# fit model using scaled training data
linear_reg.fit(X_train_scaled, y_train)

# make prediction
linear_yhat = linear_reg.predict(X_test_scaled)

# disply r-suqred and MSE
print("R-Squared = ", linear_reg.score(X_test_scaled, y_test))
print("MSE = ", metrics.mean_squared_error(y_test, linear_yhat))

R-Squared =  0.7260922962253209
MSE =  318.27775059069256


## 2. Lasso Regression

* Normally need to tune "alpha" parameter

In [11]:
from sklearn.linear_model import Lasso
# data standardization
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# declar regressor
lasso_reg = Lasso()

# fit model using scaled training data
lasso_reg.fit(X_train_scaled, y_train)

# make prediction
lasso_yhat = lasso_reg.predict(X_test_scaled)

# disply r-suqred and MSE
print("R-Squared = ", lasso_reg.score(X_test_scaled, y_test))
print("MSE = ", metrics.mean_squared_error(y_test, lasso_yhat))

R-Squared =  0.6488466780033888
MSE =  408.0363125875046


## 3. Ridge Regression

* Normally need to tune "alpha" parameter

In [13]:
from sklearn.linear_model import Ridge
# data standardization
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# declar regressor
ridge_reg = Ridge()

# fit model using scaled training data
ridge_reg.fit(X_train_scaled, y_train)

# make prediction
ridge_yhat = ridge_reg.predict(X_test_scaled)

# disply r-suqred and MSE
print("R-Squared = ", ridge_reg.score(X_test_scaled, y_test))
print("MSE = ", metrics.mean_squared_error(y_test, ridge_yhat))

R-Squared =  0.726568618021026
MSE =  317.72426988312174


## 4. ElasticNet Regression

* Normally need to tune "alpha" parameter

In [14]:
from sklearn.linear_model import ElasticNet
# data standardization
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# declar regressor
elastic_net_reg = ElasticNet()

# fit model using scaled training data
elastic_net_reg.fit(X_train_scaled, y_train)

# make prediction
elastic_yhat = elastic_net_reg.predict(X_test_scaled)

# disply r-suqred and MSE
print("R-Squared = ", elastic_net_reg.score(X_test_scaled, y_test))
print("MSE = ", metrics.mean_squared_error(y_test, elastic_yhat))

R-Squared =  0.6237710507637774
MSE =  437.1739166873132


## 5. Polynomial Regression(Feature Expansion)

* Normally need to tune expansion degree

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
# feature expansion
feature_exp = PolynomialFeatures(degree=3)
X_expand = feature_exp.fit_transform(X)

# re-split data
X_train, X_test, y_train, y_test = train_test_split(X_expand, y, test_size=0.2, random_state=1)

# data standardization
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# declar regressor
poly_reg = LinearRegression()

# fit model using scaled training data
poly_reg.fit(X_train_scaled, y_train)

# make prediction
poly_yhat = poly_reg.predict(X_test_scaled)

# disply r-suqred and MSE
print("R-Squared = ", poly_reg.score(X_test_scaled, y_test))
print("MSE = ", metrics.mean_squared_error(y_test, poly_yhat))

## 6. 