# Predicción de Utilidades de Películas

In [32]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from scipy.io import mmread

# data_dir = "../movies-preproc/ngrams-deprels-fp1-origin.runtime.budget.numscreen.ratings.seasons.stars/"
data_dir = "../movies-preproc/ngrams-deprels-origin.runtime.budget.numscreen.ratings.seasons/"

# Loading Training data
X = csr_matrix(mmread(data_dir + 'train.x.mm'))
y = np.loadtxt(data_dir + 'train.y.dat')

# Loading Test data
X_test = csr_matrix( mmread(data_dir + 'test.x.mm'))
y_test = np.loadtxt(data_dir + 'test.y.dat')

## Least Squares Regression

In [33]:
import sklearn.linear_model as lm
model = lm.LinearRegression(fit_intercept = False)
model.fit(X, y)
print "Least Squares Regression score: {}".format(model.score(X_test, y_test)) 

Linear regression score: 0.206191672241


* ** Least Squares Regression score: 0.59031488104 ** (stars)
* ** Least Squares Regression score: 0.206191672241 **

## Ridge Regression

In [34]:
test_R2 = []
lambdas = np.logspace(-3, 3, num=250)  # from 1e-3 to 1e3
ridge_model = lm.Ridge(fit_intercept=False)

for l in lambdas:    
    ridge_model.set_params(alpha=l)
    ridge_model.fit(X, y)
    test_R2.append(ridge_model.score(X_test, y_test))

i_best_lambda = np.argmax(test_R2)   
print "Best lambda Ridge Regression: {}, score: {}".format(lambdas[i_best_lambda], test_R2[i_best_lambda])

Best lambda Ridge Regression: 1000.0, score: 0.223675528483


* **Best lambda Ridge Regression: 606.921510641, score: 0.593263816723** (stars)
* **Best lambda Ridge Regression: 1000.0, score: 0.223675528483**

## Lasso Regression

In [35]:
test_R2 = []
lambdas = np.linspace(1000, 5000, num=100)
lasso_model = lm.Lasso(fit_intercept=False)

for l in lambdas:    
    lasso_model.set_params(alpha=l)
    lasso_model.fit(X, y)
    test_R2.append(lasso_model.score(X_test, y_test))

i_best_lambda = np.argmax(test_R2)  
print "Best lambda Lasso Regression: {}, score: {}".format(lambdas[i_best_lambda], test_R2[i_best_lambda])

Best lambda Lasso Regression: 1767.67676768, score: 0.276066535177


* ** Best lambda Lasso Regression: 1848.48484848, score: 0.546971190802 ** (stars)
* ** Best lambda Lasso Regression: 1767.67676768, score: 0.276066535177 **

## ElasticNet Regression

In [None]:
test_R2 = []
l1_ratios = np.linspace(0, 1, 11)
alphas = np.logspace(-2, 2, 50)

# alpha : float
# Constant that multiplies the penalty terms. 
# Defaults to 1.0 See the notes for the exact mathematical meaning of this parameter. 
# alpha = 0 is equivalent to an ordinary least square, 
# solved by the LinearRegression object. For numerical reasons, 
# using alpha = 0 with the Lasso object is not advised 
# and you should prefer the LinearRegression object.

# l1_ratio : float
# The ElasticNet mixing parameter, with 0 <= l1_ratio <= 1. 
# For l1_ratio = 0 the penalty is an L2 penalty. 
# For l1_ratio = 1 it is an L1 penalty. 
# For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.

enet_model = lm.ElasticNet(fit_intercept=False)

for l1r in l1_ratios:
    enet_model.set_params(l1_ratio=l1r)    
    for alpha in alphas:
        enet_model.set_params(alpha=alpha)
        enet_model.fit(X, y)
        test_R2.append((l1r, alpha, enet_model.score(X_test, y_test)))

from operator import itemgetter
best_parameters = max(test_R2, key=itemgetter(2))
print "Best parameters ElasticNet Regression:" 
print "l1_ratio: {}, lambda: {}, score: {}".format(best_parameters[0], best_parameters[1], best_parameters[2])

* ** Best parameters ElasticNet Regression: l1_ratio: 0.8, lambda: 0.429193426013, score: 0.607253193504 ** (stars)