# Playing with different regression algorithms

In [25]:
import numpy as np
import pandas as pd

%matplotlib inline

In [None]:
data = pd.read_csv("http://www-bcf.usc.edu/~gareth/ISL/Advertising.csv", index_col=0)

x = data[['TV', 'Radio', 'Newspaper']]
y = data.Sales

In [None]:
from sklearn.cross_validation import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.3, random_state=4)

In [70]:
from sklearn.metrics import explained_variance_score, mean_absolute_error, mean_squared_error, r2_score

def show_metrics(ytrue, ypred):
    # Best => 1.0; Higher value is better
    print("Explained Variance Score: %.3f" % explained_variance_score(ytrue, ypred))
    
    # Lower value is better
    print("Mean Absolute Error: %.3f" % mean_absolute_error(ytrue, ypred))
    print("Mean Squared Error: %.3f" % mean_squared_error(ytrue, ypred))
    
    # Likelihood that future predictions are correct
    print("R2 Score: %.3f" % r2_score(ytrue, ypred))

## Linear Regression

In [99]:
from sklearn.linear_model import LinearRegression

clf = LinearRegression().fit(xtrain, ytrain)
show_metrics(ytest, clf.predict(xtest))

Explained Variance Score: 0.913
Mean Absolute Error: 1.211
Mean Squared Error: 2.224
R2 Score: 0.913


## Ridge Regression

In [100]:
from sklearn.linear_model import Ridge

clf1 = Ridge().fit(xtrain, ytrain)
show_metrics(ytest, clf1.predict(xtest))

Explained Variance Score: 0.913
Mean Absolute Error: 1.211
Mean Squared Error: 2.224
R2 Score: 0.913


## Stocastic Gradient Descent

In [101]:
from sklearn.linear_model import SGDRegressor

clf2 = SGDRegressor().fit(xtrain, ytrain)
show_metrics(ytest, clf2.predict(xtest))

Explained Variance Score: -5033547110624233506471936.000
Mean Absolute Error: 25605343110074.039
Mean Squared Error: 783884517110536767669272576.000
R2 Score: -30765624179320029343383552.000


## Random Forest Regressor

In [102]:
from sklearn.ensemble import RandomForestRegressor

clf3 = RandomForestRegressor(max_depth=4).fit(xtrain, ytrain)
show_metrics(ytest, clf3.predict(xtest))

Explained Variance Score: 0.959
Mean Absolute Error: 0.810
Mean Squared Error: 1.135
R2 Score: 0.955
