In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.cross_validation import cross_val_score, train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import manifold, decomposition, linear_model, ensemble, neighbors, cross_validation, metrics




In [2]:
train = pd.read_csv("train_without_oversampling.csv")
test = pd.read_csv("test.csv")

y = train.target
train.drop("target", axis=1, inplace=True)

train.drop("client_id", axis=1, inplace=True)
ans = test[["client_id"]]
test.drop("client_id", axis=1, inplace=True)


In [4]:
train_data, test_data, y_train, y_test = train_test_split(train, y, test_size = 0.3, stratify = y)

In [6]:
lin_cl = linear_model.LogisticRegression(class_weight="balanced", verbose=1, n_jobs=-1, C=0.8)
lin_cl.fit(train_data, y_train)
preds = lin_cl.predict(test_data)
res = metrics.roc_auc_score(y_test, preds)
print res

[LibLinear]0.593169310382


In [7]:
from hyperopt import hp
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials

In [12]:
def objective(space):
    print space
    c = space['C']
    pen = space['pen']
 
    cv = cross_validation.StratifiedShuffleSplit(y, n_iter=5, test_size=0.3)
    lin_cl = linear_model.LogisticRegression(C=c, penalty=pen, n_jobs=-1)
    res = cross_validation.cross_val_score(estimator=lin_cl, X=train, y=y, scoring="roc_auc", \
                                    cv = cv, n_jobs=-1)
    print res.mean()
    return{'loss': -res.mean(), 'status': STATUS_OK}


space ={
        'C' : hp.uniform('C', 0.1, 2.0),
        'pen' : hp.choice('pen', ["l1", "l2"])
}


trials = Trials()
best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=75,
            trials=trials)

print best

{'C': 1.9508027752562256, 'pen': 'l2'}
0.562614701779
{'C': 0.8918126242838899, 'pen': 'l2'}
0.563390654002
{'C': 1.4699565766384561, 'pen': 'l2'}
0.565460954663
{'C': 1.5616597934958711, 'pen': 'l1'}
0.647029622922
{'C': 1.2218897179310575, 'pen': 'l2'}
0.566820613065
{'C': 0.4441597610176703, 'pen': 'l1'}
0.646232462308
{'C': 0.6530024636414052, 'pen': 'l1'}
0.64663708943
{'C': 0.37772049789413753, 'pen': 'l2'}
0.557924265955
{'C': 1.5790280068348914, 'pen': 'l1'}
0.644806178059
{'C': 0.5087638589751203, 'pen': 'l1'}
0.644981632652
{'C': 0.6829193273262597, 'pen': 'l1'}
0.645496524745
{'C': 1.5978045462657848, 'pen': 'l1'}
0.645833655768
{'C': 0.6346704882828127, 'pen': 'l2'}
0.564367423389
{'C': 1.887468478235327, 'pen': 'l1'}
0.645712478783
{'C': 0.40929551715701906, 'pen': 'l1'}
0.646462692432
{'C': 1.5932914255125816, 'pen': 'l1'}
0.64430449053
{'C': 1.7525566863157127, 'pen': 'l1'}
0.647709759679
{'C': 1.140963658331123, 'pen': 'l1'}
0.647479130691
{'C': 0.8446954816661926, 'pen

KeyboardInterrupt: 

best 
{'C': 0.34394279435077646, 'pen': 'l1'}
0.605996210639

In [None]:
print best