In [39]:
import pandas as pd
import numpy as np

from scipy.stats import uniform

from sklearn.linear_model import ElasticNet, PassiveAggressiveRegressor, LinearRegression, ARDRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import KFold, cross_val_score, RandomizedSearchCV
from sklearn.metrics import r2_score

Uploading competition data

In [2]:
X_train = pd.read_csv("competition_data/trainX.csv", index_col = 'Id').to_numpy()
Y_train = pd.read_csv("competition_data/trainY.csv", index_col = 'Id').Value.to_numpy()
X_test = pd.read_csv("competition_data/testX.csv", index_col = 'Id').to_numpy()

Definition of PolynomialFeatures

In [3]:
poly = PolynomialFeatures(2)

Using RandomizedSearchCV to fit hyperparameters

In [7]:
X = poly.fit_transform(X_train)
elastic = ElasticNet(max_iter=10000)

distributions = {
    'alpha': uniform(),
    'l1_ratio': uniform(),
}

rand_search = RandomizedSearchCV(elastic, distributions, n_iter=100, cv=5)
search_result = rand_search.fit(X, Y_train)

print('Best parameters:', search_result.best_params_)
print('Best parameters:', search_result.best_score_)

elastic_rand_search_prm = search_result.best_params_

Best parameters: {'alpha': 0.012378390195448508, 'l1_ratio': 0.6553692385314868}
Best parameters: 0.3871080602337083


In [8]:
X = poly.fit_transform(X_train)
passive_aggressive = PassiveAggressiveRegressor(max_iter=5000, random_state=0)

distributions = {
    'C': uniform(loc=0, scale=0.1),
    'epsilon': uniform(loc=0, scale=0.1),
    'tol': uniform(1e-3, 1e-10),
}

rand_search = RandomizedSearchCV(passive_aggressive, distributions, n_iter=100, cv=5)
search_result = rand_search.fit(X, Y_train)

print('Best parameters:', search_result.best_params_)
print('Best parameters:', search_result.best_score_)

passive_aggressive_rand_search_prm = search_result.best_params_

Best parameters: {'C': 0.008766832563621152, 'epsilon': 0.03312366582829205, 'tol': 0.0010000000266718165}
Best parameters: 0.3151735931348718


In [44]:
X = poly.fit_transform(X_train)
ard = ARDRegression(n_iter=300)

distributions = {
    'tol': uniform(1e-2, 1e-5),
    'alpha_1': uniform(1e-1, 1e-11),
    'alpha_2': uniform(1e-1, 1e-11),
    'lambda_1': uniform(1e-1, 1e-11),
    'lambda_2': uniform(1e-1, 1e-11),
    'lambda_1': uniform(1e-1, 1e-11),
}

rand_search = RandomizedSearchCV(ard, distributions, n_iter=2, cv=5)
search_result = rand_search.fit(X, Y_train)

print('Best parameters:', search_result.best_params_)
print('Best parameters:', search_result.best_score_)

ard_rand_search_prm = search_result.best_params_

Best parameters: {'alpha_1': 0.10000000000097219, 'alpha_2': 0.1000000000021046, 'lambda_1': 0.1000000000053792, 'lambda_2': 0.1000000000082386, 'tol': 0.010004695408376181}
Best parameters: 0.3252779132921292


PassiveAggressiveRegressor, Kaggle public test score: 0.64698 👇

In [31]:
X = poly.fit_transform(X_train)
testX = poly.fit_transform(X_test)

passive_aggressive = PassiveAggressiveRegressor(max_iter=100, random_state=0)
passive_aggressive.fit(X, Y_train)
y_pred = passive_aggressive.predict(testX).reshape(2000,)

print(cross_val_score(passive_aggressive, X, Y_train, cv=5).mean())
y_pred

-0.9277521496759714


array([19.72246023, 34.83825565, 29.16675255, ..., 26.77971688,
       32.18134777, 29.75120905])

LinearRegression, Kaggle public test score: 0.43746 👇

In [32]:
X = poly.fit_transform(X_train)
testX = poly.fit_transform(X_test)

linear = LinearRegression()
linear.fit(X, Y_train)
y_pred = linear.predict(testX).reshape(2000,)

print(cross_val_score(linear, X, Y_train, cv=5).mean())
y_pred

0.30151623990826026


array([23.95507812, 32.609375  , 27.62890625, ..., 23.5703125 ,
       28.68359375, 27.41992188])

Elastic Net, Kaggle public test score: 0.46244 👇

In [37]:
X = poly.fit_transform(X_train)
testX = poly.fit_transform(X_test)

elastic = ElasticNet(max_iter=5000, **elastic_rand_search_prm)
elastic.fit(X, Y_train)
y_pred = elastic.predict(testX).reshape(2000,)

print(cross_val_score(elastic, X, Y_train, cv=5).mean())
y_pred

0.3871080602337083


array([26.85105463, 32.00113497, 29.36732264, ..., 26.85124476,
       28.5933084 , 28.28644156])

ARDRegression

In [45]:
X = poly.fit_transform(X_train)
testX = poly.fit_transform(X_test)

ard = ARDRegression(n_iter=5000)
ard.fit(X, Y_train)
y_pred = ard.predict(testX).reshape(2000,)

print(cross_val_score(ard, X, Y_train, cv=5).mean())
y_pred

KeyboardInterrupt: 

Writing to file

In [None]:
Y_predicted = pd.DataFrame({'Value': y_pred, 'Id': range(len(y_pred))})
Y_predicted.to_csv('mullagaliev_amir.csv', index=False)
print(Y_predicted)