In [1]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.model_selection import train_test_split, cross_validate, cross_val_score

# Importing and splitting the data 

In [2]:
data = np.genfromtxt("fuel_data.csv", delimiter = ',') 

X = data.T[0:7].T
Y = data.T[-1].T

#Splitting data into test and train
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=20)

print np.shape(X_train)
print np.shape(X_test)
print np.shape(Y_train)
print np.shape(Y_test)

(294L, 7L)
(98L, 7L)
(294L,)
(98L,)


# Creating the regression model

In [3]:
regr = linear_model.LinearRegression() 

In [4]:
regr.fit(X_train,Y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

# Getting the predictions

In [5]:
Y_pred = regr.predict(X_test)
Y_pred_train = regr.predict(X_train) 

# Cross Validation

In [6]:
scores = cross_val_score(regr, X, Y, cv = 4)
print np.mean(scores) 

0.805639410302


# MSE and R2

In [7]:
print("Mean squared error (train): %.2f"
      % mean_squared_error(Y_train,Y_pred_train))
print("R2(train): %.2f"
      % r2_score(Y_train,Y_pred_train))  

Mean squared error (train): 10.26
R2(train): 0.83


In [8]:
print("Mean squared error (test): %.2f"
      % mean_squared_error(Y_test, Y_pred)) 
print("R2(test): %.2f"
      % r2_score(Y_test,Y_pred))  

Mean squared error (test): 13.00
R2(test): 0.79


# Mean Absolute Error

In [9]:
print("mean_absolute_error(test): %.2f"
      % mean_absolute_error(Y_test, Y_pred))

print("mean_absolute_error(train): %.2f"
      % mean_absolute_error(Y_train, Y_pred_train))

mean_absolute_error(test): 2.77
mean_absolute_error(train): 2.43


# Maximum Absolute Error

In [10]:
def max_abs_error(data1,data2):
    Y_error = []
    for i in range(len(Y_test)):
        Y_error.append(100*(abs(data1[i] - data2[i])/data1[i]))
    return Y_error

In [11]:
MAE_test = max_abs_error(Y_test, Y_pred)
MAE_train = max_abs_error(Y_train, Y_pred_train)
print max(MAE_test) 
print max(MAE_train) 

53.4973265952
51.3445415501


# Cofficients of regression

In [12]:
print('Coefficients: \n', regr.coef_)

('Coefficients: \n', array([-0.54312507,  0.02686998, -0.01002096, -0.00722013,  0.13258509,
        0.77755046,  1.59068595]))
