In [4]:
import sys
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, Lasso, Ridge, RidgeCV, LassoCV
from sklearn.model_selection import cross_val_score

##accessing columns| ndarray[:,col_i]
##accessing rows   | ndarray[row_i,:]
##python q2.py continuous.csv secrettestset.csv
#then it (1) estimates the parameters of your regression model from the data in
#continuous.csv and 
# (2) outputs the predicted Y value for each data point in
#secrettest.csv and also the R2 value for the fitted regression model when evaluated on
#secrettest.csv.

raw_train_data = sys.argv[1]
raw_secret_data = sys.argv[2]

In [None]:
def data_loader(data):
    f = data[:,0:10]
    t = data[:,[10]]
    return f,t

In [6]:
#data = np.genfromtxt('continuous.csv', delimiter=',')
#secret = np.genfromtxt('secrettest.csv', delimiter=',')
data = np.genfromtxt(raw_train_data, delimiter=',')
secret = np.genfromtxt(raw_secret_data, delimiter=',')
features,target = data_loader(data)
f_secret, t_secret = data_loader(secret)

In [7]:
def print_exam(model, t_features, t_target):
    print("R^2 value for the secret test %.3f" % model.score(t_features,t_target))
    print("Predictions for the secret test:")
    print(model.predict(t_features))

def evaluate(model, Xt, Yt, has_plot = False):
    print("="*50)
    name = type(model).__name__
    print("Metrics for", name)
    print("R^2 value %.3f" % model.score(Xt,Yt))
    print("CrossValidated R^2 values at 3 folds:", cross_val_score(model, Xt, Yt,scoring='r2', cv=3))
    print("Coefficients:", model.coef_)
    print("Intercept i.e. beta0", model.intercept_)
    if has_plot:
        for i in range(len(Xt[0])):
            sorted_index = np.argsort(Xt[:,i])
            sX = np.sort(Xt[:,i])
            sY = Yt[sorted_index]
            plot(sX, sY, model.predict(Xt),("X"+str(i+1),"Target"))
    print("="*50)

In [8]:
def plot(X, Y, pred = None, labels = ("Feature","Target")):
    plt.xlabel(labels[0])
    plt.ylabel(labels[1])
    plt.plot(X, Y, 'o', label='data')
    if pred is not None : plt.plot(X, pred, color='blue', linewidth=3)
    plt.show()

In [12]:
print("CVLasso Regression")
r2_features = (features[:,[6,7,9]]) #derived from LassoCV previous LassoCV
r2_lassoCV = Lasso(alpha=0.2627152448819272).fit(r2_features,target)
evaluate(r2_lassoCV, r2_features, target, False)

print_exam(r2_lassoCV,f_secret,t_secret)

CVLasso Regression
Metrics for Lasso
R^2 value 0.984
CrossValidated R^2 values at 3 folds: [  0.85716983   0.77093571 -81.43584189]
Coefficients: [ 9.13699415 -1.47329235  4.20002768]
Intercept i.e. beta0 [4.99751925]
R^2 value for the secret test 0.984
Predictions for the secret test:
[ -7.79119694 -56.56395855  40.51831255 -13.41397411  -1.43206414
 -34.34271089  25.67996117  26.70745749]
