In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
X = []
Y = []

with open('data_multivar.txt', 'r') as f:
    for line in f.readlines():
        data = [float(i) for i in line.split(',')]
        X.append(data[:-1])
        Y.append(data[-1])
        
X = np.array(X)
Y = np.array(Y)

In [3]:
num_training = int(0.8*len(X))
num_test = len(X) - num_training

# Training Data
X_train = np.array(X[:num_training, :])
Y_train = np.array(Y[:num_training])

# Test data
X_test = np.array(X[num_training:, :])
Y_test = np.array(Y[num_training:])

## Building a Ridge Regressor

In [5]:
from sklearn.linear_model import Ridge

In [6]:
ridge_regressor = Ridge(alpha=0.01, fit_intercept=True, max_iter=10000)

In [7]:
ridge_regressor.fit(X_train, Y_train)
y_test_pred_ridge = ridge_regressor.predict(X_test)

In [8]:
import sklearn.metrics as sm

In [9]:
print("Mean absolute error = ", round(sm.mean_absolute_error(Y_test, y_test_pred_ridge), 2))

Mean absolute error =  3.95


In [10]:
print("Mean squared error = ", round(sm.mean_squared_error(Y_test, y_test_pred_ridge), 2))

Mean squared error =  23.15


In [11]:
print("Median absolute error = ", round(sm.median_absolute_error(Y_test, y_test_pred_ridge), 2))

Median absolute error =  3.69


In [12]:
print("Explain variance score = ", round(sm.explained_variance_score(Y_test, y_test_pred_ridge), 2))

Explain variance score =  0.84


In [13]:
print("R2 score = ", round(sm.r2_score(Y_test, y_test_pred_ridge), 2))

R2 score =  0.83


## Linear Regression

In [15]:
from sklearn.linear_model import LinearRegression

In [16]:
linear_regression = LinearRegression()

In [17]:
linear_regression.fit(X_train, Y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [21]:
y_test_pred_linear = linear_regression.predict(X_test)

In [22]:
print("Mean absolute error = ", 
      round(sm.mean_absolute_error(Y_test, y_test_pred_linear), 2))

Mean absolute error =  3.95


In [23]:
print("Mean squared error = ", 
      round(sm.mean_squared_error(Y_test, y_test_pred_linear), 2))

Mean squared error =  23.15


In [24]:
print("Median absolute error = ", 
      round(sm.median_absolute_error(Y_test, y_test_pred_linear), 2))

Median absolute error =  3.69


In [25]:
print("Explain variance score = ", 
      round(sm.explained_variance_score(Y_test, y_test_pred_linear), 2))

Explain variance score =  0.84


In [26]:
print("R2 score = ", 
      round(sm.r2_score(Y_test, y_test_pred_linear), 2))

R2 score =  0.83


## Polynomial Regression

In [27]:
from sklearn.preprocessing import PolynomialFeatures

In [34]:
polynomial = PolynomialFeatures(degree=10)
X_train_transformed = polynomial.fit_transform(X_train)
datapoint = [0.39, 2.78, 7.11]
poly_datapoint = polynomial.fit_transform(datapoint)



In [35]:
poly_linear_model = LinearRegression()
poly_linear_model.fit(X_train_transformed, Y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [36]:
print("Linear Regression: ", linear_regression.predict(datapoint))
print("\nPolynomial Regression: ", poly_linear_model.predict(poly_datapoint))

Linear Regression:  [-11.0587295]

Polynomial Regression:  [-8.14899154]




## Stochastic Gradient Descent Regressor

In [37]:
from sklearn.linear_model import SGDRegressor

In [39]:
sgd_regressor = SGDRegressor(loss='huber', n_iter=50)
sgd_regressor.fit(X_train, Y_train)

SGDRegressor(alpha=0.0001, average=False, epsilon=0.1, eta0=0.01,
       fit_intercept=True, l1_ratio=0.15, learning_rate='invscaling',
       loss='huber', n_iter=50, penalty='l2', power_t=0.25,
       random_state=None, shuffle=True, verbose=0, warm_start=False)