### 데이터 불러오기

In [None]:
# setup
import pandas as pd
import numpy as np
import matplotlib as plt
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score
from sklearn.metrics import f1_score, roc_auc_score
import random
import functools
random.seed(1117)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
file1='/content/drive/Shareddrives/Spotify Recommendation/Train.csv'
file2='/content/drive/Shareddrives/Spotify Recommendation/Test.csv'

In [None]:
train=pd.read_csv(file1)
test=pd.read_csv(file2)

In [None]:
print("Train datasets shape:", train.shape)
print("Test datasets shape:", test.shape)

Train datasets shape: (1815, 11)
Test datasets shape: (202, 11)


In [None]:
train_x = train.drop(train.columns[[0,10]], axis=1)
x_test = test.drop(test.columns[[0,10]], axis=1) 

In [None]:
train_y = train.iloc[:,10]
y_test = test.iloc[:,10]

In [None]:
train_y[train_y==1] # 1815개 중 918개
y_test[y_test==1] # 202개 중 102개
# over-sampling 진행 안 해도 되겠다고 판단함.

0      1
1      1
2      1
4      1
7      1
      ..
191    1
192    1
194    1
197    1
200    1
Name: target, Length: 102, dtype: int64

### Validation set 만들기 (0.2)


In [None]:
from sklearn.model_selection import train_test_split

x_train, x_valid, y_train, y_valid = train_test_split(train_x, train_y, test_size=0.2, shuffle=True , random_state=42)

In [None]:
print("Train_x datasets shape:", x_train.shape)
print("Valid_x datasets shape:", x_valid.shape)

Train_x datasets shape: (1452, 9)
Valid_x datasets shape: (363, 9)


### 함수 정의

In [None]:
def get_clf_eval(y_test, y_pred):
    confusion = confusion_matrix(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    F1 = f1_score(y_test, y_pred)
    AUC = roc_auc_score(y_test, y_pred)
    print('오차행렬:\n', confusion)
    print('\n정확도: {:.4f}'.format(accuracy))
    print('정밀도: {:.4f}'.format(precision))
    print('재현율: {:.4f}'.format(recall))
    print('F1: {:.4f}'.format(F1))
    print('AUC: {:.4f}'.format(AUC))

### Logistic Regression model(LASSO) + GridSearchCV

: sigmoid 함수 사용. 결과 값 0,1로 반환.

: 가중치의 제곱의 합이 아닌 가중치의 합을 더한 값에 규제 강도를 곱하여 오차에 더한다.

: 어떤 가중치는 실제로 0이 된다. 모델에서 완전히 제외되는 특성이 생기는 것.

: F1: 0.6341



In [None]:
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV

In [None]:
lasso=Lasso()
print(lasso.get_params().keys())

dict_keys(['alpha', 'copy_X', 'fit_intercept', 'max_iter', 'normalize', 'positive', 'precompute', 'random_state', 'selection', 'tol', 'warm_start'])


In [None]:
lasso = Lasso(random_state=1, max_iter=10000)
alphas = np.logspace(-4, -0.5, 30)

tuned_parameters = [{'alpha': alphas}]
n_folds = 5

clf = GridSearchCV(lasso, tuned_parameters, cv=n_folds)
clf.fit(x_train, y_train)

GridSearchCV(cv=5, error_score=nan,
             estimator=Lasso(alpha=1.0, copy_X=True, fit_intercept=True,
                             max_iter=10000, normalize=False, positive=False,
                             precompute=False, random_state=1,
                             selection='cyclic', tol=0.0001, warm_start=False),
             iid='deprecated', n_jobs=None,
             param_grid=[{'alpha': array([1.00000000e-04, 1.32035178e-04, 1.74332882e-04, 2.30180731e-04,
       3.03919538e-04, 4...
       2.80721620e-03, 3.70651291e-03, 4.89390092e-03, 6.46167079e-03,
       8.53167852e-03, 1.12648169e-02, 1.48735211e-02, 1.96382800e-02,
       2.59294380e-02, 3.42359796e-02, 4.52035366e-02, 5.96845700e-02,
       7.88046282e-02, 1.04049831e-01, 1.37382380e-01, 1.81393069e-01,
       2.39502662e-01, 3.16227766e-01])}],
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [None]:
Lasso_best = clf.best_estimator_

In [None]:
print(Lasso_best)

Lasso(alpha=0.0005298316906283707, copy_X=True, fit_intercept=True,
      max_iter=10000, normalize=False, positive=False, precompute=False,
      random_state=1, selection='cyclic', tol=0.0001, warm_start=False)


In [None]:
y_pred = Lasso_best.predict(x_valid)
y_pred1=pd.Series(y_pred)
print(accuracy_score(y_valid, y_pred1.round()))

0.6528925619834711


In [None]:
y_pred = Lasso_best.predict(x_test)
y_pred1=pd.Series(y_pred)

In [None]:
print(accuracy_score(y_test, y_pred1.round()))
pd.crosstab(y_test, y_pred1, rownames = ["True"], colnames = ["Predicted"], margins = True)

0.6287128712871287


Predicted,0.17674827014595942,0.17825957532733208,0.21668126601935034,0.2465026762928309,0.2526444803818896,0.2583418961256696,0.2603727765176539,0.2671011971278656,0.26847605131168595,0.2759801972241263,0.2842971596985691,0.2874333954227102,0.29921032805680553,0.3287018532745194,0.3305688851540079,0.3331764654129464,0.3353459379663826,0.3404971396897381,0.34943017122810915,0.353793575399173,0.3546061491283876,0.35901248176853745,0.36308442246251976,0.3692549209985558,0.37055670913257754,0.37290706531013074,0.38012737764985155,0.38279985748591955,0.3850373942230948,0.387277448626743,0.3883327836761736,0.39166701717537383,0.39763092890191504,0.399015247942674,0.4022379978753306,0.4064295667941427,0.40852295229490726,0.40954874755815096,0.40991738897216096,0.41214300390640646,...,0.661227504576303,0.6649099729021166,0.6666327938997618,0.6701030996307829,0.6720900211646685,0.6779229694358859,0.6892508987239645,0.6906555555817913,0.6989462341667727,0.7026519538081268,0.7153052866400065,0.7214024701732722,0.7263250602073146,0.7266768158155905,0.7296456210749294,0.7567307926712166,0.757763782863708,0.7625211990595813,0.766452069235998,0.7667243509584385,0.7672877179693294,0.7735149504204069,0.7917255685935669,0.7945899601093969,0.7957588543726721,0.7992104489220149,0.8142251171470398,0.817128995985364,0.8303250883442128,0.8341855212363027,0.8357467768776587,0.8367161598437083,0.8699081853208426,0.8961338101123775,0.9063881545342869,0.9175373355687424,0.9600180910145613,1.066082926269668,1.1283978703484312,All
True,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
0,1,1,1,1,1,1,1,0,0,0,1,1,1,0,1,0,1,0,0,1,1,0,1,1,1,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,...,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,100
1,0,0,0,0,0,0,0,1,1,1,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,1,1,1,1,0,0,0,0,0,0,1,1,0,0,...,1,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,102
All,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,202


In [None]:
get_clf_eval(y_test,y_pred1.round())

오차행렬:
 [[62 38]
 [37 65]]

정확도: 0.6287
정밀도: 0.6311
재현율: 0.6373
F1: 0.6341
AUC: 0.6286


### Logistic Regression model(RIDGE) + GridSearchCV
https://wikidocs.net/16594

:sigmoid 함수 사용. 결과 값 0,1로 반환.

: 각 가중치 제곱의 합에 규제 강도 (labmda) 곱함.

: lambda를 크게 하면 가중치가 더 많이 감소하고, lambda를 작게 하면 가중치가 증가한다.

: F1: 0.6381

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold

In [None]:
LR=LogisticRegression()

In [None]:
print(LR.get_params().keys())

dict_keys(['C', 'class_weight', 'dual', 'fit_intercept', 'intercept_scaling', 'l1_ratio', 'max_iter', 'multi_class', 'n_jobs', 'penalty', 'random_state', 'solver', 'tol', 'verbose', 'warm_start'])


In [None]:
from sklearn.model_selection import GridSearchCV

# 파라메터 후보
param_grid = {'C': [9.5,9.6,9.7,9.8,9.9,10,10.1,10.2,10.3,10.4,10.5],
              'penalty': ['l2']}

# 그리드 서치 진행

grid_search = GridSearchCV(LR, param_grid, cv=KFold(n_splits=10))

# 최종 모델 성능 점검

grid_search.fit(x_train,y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

GridSearchCV(cv=KFold(n_splits=10, random_state=None, shuffle=False),
             error_score=nan,
             estimator=LogisticRegression(C=1.0, class_weight=None, dual=False,
                                          fit_intercept=True,
                                          intercept_scaling=1, l1_ratio=None,
                                          max_iter=100, multi_class='auto',
                                          n_jobs=None, penalty='l2',
                                          random_state=None, solver='lbfgs',
                                          tol=0.0001, verbose=0,
                                          warm_start=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': [9.5, 9.6, 9.7, 9.8, 9.9, 10, 10.1, 10.2, 10.3,
                               10.4, 10.5],
                         'penalty': ['l2']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [None]:
Ridge_best = grid_search.best_estimator_

In [None]:
print(Ridge_best)

LogisticRegression(C=9.7, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)


In [None]:
y_pred = Ridge_best.predict(x_valid)
y_pred1=pd.Series(y_pred)
print(accuracy_score(y_valid, y_pred1.round()))

0.6584022038567493


In [None]:
y_pred = Ridge_best.predict(x_test)
y_pred1=pd.Series(y_pred)

In [None]:
print(accuracy_score(y_test, y_pred1.round()))
pd.crosstab(y_test, y_pred1, rownames = ["True"], colnames = ["Predicted"], margins = True)

0.6237623762376238


Predicted,0,1,All
True,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,59,41,100
1,35,67,102
All,94,108,202


In [None]:
get_clf_eval(y_test,y_pred1.round())

오차행렬:
 [[59 41]
 [35 67]]

정확도: 0.6238
정밀도: 0.6204
재현율: 0.6569
F1: 0.6381
AUC: 0.6234


### Logistic Regression model(Elastic Net) + GridSearchCV

: F1: 0.6286

In [None]:
from sklearn.linear_model import ElasticNet

In [None]:
model=ElasticNet()

In [None]:
print(model.get_params().keys())

dict_keys(['alpha', 'copy_X', 'fit_intercept', 'l1_ratio', 'max_iter', 'normalize', 'positive', 'precompute', 'random_state', 'selection', 'tol', 'warm_start'])


In [None]:
# 파라메터 후보
param_grid = {'alpha': [0.0098,0.0099,0.001,0.002,0.003,0.004,0.005],'l1_ratio':[0.001,0.01,0.1,1]}

# 그리드 서치 진행

grid_search = GridSearchCV(model, param_grid, cv=KFold(n_splits=10))

In [None]:
grid_search.fit(x_train,y_train)

GridSearchCV(cv=KFold(n_splits=10, random_state=None, shuffle=False),
             error_score=nan,
             estimator=ElasticNet(alpha=1.0, copy_X=True, fit_intercept=True,
                                  l1_ratio=0.5, max_iter=1000, normalize=False,
                                  positive=False, precompute=False,
                                  random_state=None, selection='cyclic',
                                  tol=0.0001, warm_start=False),
             iid='deprecated', n_jobs=None,
             param_grid={'alpha': [0.0098, 0.0099, 0.001, 0.002, 0.003, 0.004,
                                   0.005],
                         'l1_ratio': [0.001, 0.01, 0.1, 1]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [None]:
ElasticNet_best = grid_search.best_estimator_

In [None]:
print(ElasticNet_best)

ElasticNet(alpha=0.001, copy_X=True, fit_intercept=True, l1_ratio=0.001,
           max_iter=1000, normalize=False, positive=False, precompute=False,
           random_state=None, selection='cyclic', tol=0.0001, warm_start=False)


In [None]:
y_pred = ElasticNet_best.predict(x_valid)
y_pred1=pd.Series(y_pred)
print(accuracy_score(y_valid, y_pred1.round()))

0.6584022038567493


In [None]:
print(ElasticNet_best)

ElasticNet(alpha=0.001, copy_X=True, fit_intercept=True, l1_ratio=0.001,
           max_iter=1000, normalize=False, positive=False, precompute=False,
           random_state=None, selection='cyclic', tol=0.0001, warm_start=False)


In [None]:
y_pred = ElasticNet_best.predict(x_test)
y_pred1=pd.Series(y_pred)

In [None]:
print(accuracy_score(y_test, y_pred1.round()))
pd.crosstab(y_test, y_pred1, rownames = ["True"], colnames = ["Predicted"], margins = True)

0.6138613861386139


Predicted,0.17457986592920038,0.17527187873593955,0.2197653056536498,0.2460130291056398,0.24987266763745591,0.2585238847156601,0.25858116598937014,0.2675060105894861,0.27379139966098004,0.27885591415499555,0.2812598391496162,0.28409327939919377,0.2983728951492046,0.32668873887997896,0.3311255996130193,0.3334755488319684,0.3365493208367675,0.34205077931117,0.3477257790964883,0.3527740819014718,0.35678186807084317,0.36183719502828826,0.36337628341385975,0.36848100530923245,0.37706318785044945,0.3781083451128153,0.3811555196696711,0.382131468486258,0.3860282683564715,0.38693017003627395,0.38894973902032376,0.3901632955072692,0.39682605964967443,0.39959514826566866,0.4007903849368868,0.40432893385653434,0.406658873467712,0.4069006461033948,0.4094746632366464,0.41191110762123273,...,0.6571083593850363,0.6645583714070553,0.667239467589678,0.6741739829361724,0.6799015942039225,0.6817143001661377,0.688095448088761,0.6929189413997495,0.6935631840163962,0.7054990386100662,0.7122534027168618,0.7147293052053054,0.7229426008559475,0.7256106606339126,0.7281378955420327,0.7518396101408563,0.752471952624304,0.7561650975100426,0.766626897459526,0.7671753195104866,0.770163230041492,0.7748794790600766,0.7812919081612278,0.7882379465399616,0.7922387658297835,0.7923725091016591,0.8101252687857933,0.8159665786713746,0.832481400990651,0.8329659922078102,0.8366296964021399,0.8434953813640326,0.8599100657815191,0.8834318064586025,0.9025839294051248,0.9205980015834577,0.9437858358294832,1.0580975375068467,1.1449962557462294,All
True,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
0,1,1,1,1,1,1,1,0,0,0,1,1,1,0,1,1,0,0,0,1,0,1,1,1,1,0,1,0,0,0,1,1,1,1,1,1,0,0,1,1,...,0,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,100
1,0,0,0,0,0,0,0,1,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,...,1,1,0,1,1,0,1,1,0,1,1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1,1,0,1,1,102
All,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,202


In [None]:
get_clf_eval(y_test,y_pred1.round())

오차행렬:
 [[58 42]
 [36 66]]

정확도: 0.6139
정밀도: 0.6111
재현율: 0.6471
F1: 0.6286
AUC: 0.6135


### Logistic Regreesion + Feature Selection (9개 중 5개) + grid_search(L2)

In [None]:
from sklearn.feature_selection import SelectKBest, f_classif

selectK = SelectKBest(score_func=f_classif, k=5)
train_xK = selectK.fit_transform(train_x, train_y)
test_xK = selectK.fit_transform(x_test, y_test)

In [None]:
grid_search.fit(train_xK,train_y)

GridSearchCV(cv=KFold(n_splits=10, random_state=None, shuffle=False),
             error_score=nan,
             estimator=ElasticNet(alpha=1.0, copy_X=True, fit_intercept=True,
                                  l1_ratio=0.5, max_iter=1000, normalize=False,
                                  positive=False, precompute=False,
                                  random_state=None, selection='cyclic',
                                  tol=0.0001, warm_start=False),
             iid='deprecated', n_jobs=None,
             param_grid={'alpha': [0.001, 0.01, 0.1, 1, 10, 100],
                         'l1_ratio': [0.001, 0.01, 0.1, 1]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [None]:
Featureselection_best = grid_search.best_estimator_

In [None]:
y_pred = Featureselection_best.predict(test_xK)
print(accuracy_score(y_test, y_pred.round()))
pd.crosstab(y_test, y_pred, rownames = ["True"], colnames = ["Predicted"], margins = True)

0.04950495049504951


Predicted,-8.232559259935083,-6.89926787596032,-5.607326042742262,-5.397500014971659,-5.244646871709646,-5.161033544691659,-4.933289009070276,-4.466030272788162,-4.457735272201784,-4.45309475155321,-4.10459071789912,-3.9572126163025008,-3.882416261733937,-3.873852291651879,-3.754739989054294,-3.681951288731484,-3.625685874835261,-3.56382685550003,-3.493017671946995,-3.437219393698414,-3.1999081302391263,-3.1348149920050505,-3.070684679547532,-2.9736365349944847,-2.930422223589953,-2.9103682757341502,-2.6402288481279044,-2.5359488118772555,-2.495084972475126,-2.449387711011991,-2.370776946105164,-2.3664701163703787,-2.274048744594025,-2.2435387922203844,-2.174395089214247,-2.154161847146352,-2.1530080064865116,-2.128978039275785,-2.122748925504345,-2.118497139824452,...,-0.8191908561277619,-0.811185572478458,-0.8076597866140747,-0.7904157019153957,-0.7675003600420778,-0.7642570509860547,-0.7626652612203719,-0.7501789891961457,-0.7328659856431023,-0.708403607817718,-0.6909875429694441,-0.6641770010066568,-0.647405558275348,-0.639629285668118,-0.6063765004544468,-0.6042850435186113,-0.5983462824660821,-0.5820631585922373,-0.5729968666239864,-0.5578077932860408,-0.535339113545348,-0.5296346611645624,-0.495536758846251,-0.49272012654417346,-0.48945240805441054,-0.4662108691456948,-0.4274309627571898,-0.4245728395980371,-0.4179128683927193,-0.4115525598533661,-0.31831184921116334,-0.31473682825226745,-0.2928552945620239,-0.2829552288140538,-0.2656952324638515,0.06669775622081064,0.08897117162758927,0.304718335086932,0.31289060017258025,All
True,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
0,1,0,1,1,1,0,0,1,1,0,0,0,1,1,1,1,0,0,1,1,1,0,1,1,0,0,0,1,0,0,1,0,0,1,1,1,0,0,0,0,...,1,1,0,1,0,0,1,1,0,0,1,1,0,1,0,1,0,1,1,0,1,1,1,1,0,0,1,1,1,0,1,1,0,0,1,1,0,0,1,100
1,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,0,1,1,0,1,1,0,0,0,1,1,1,1,...,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,1,1,0,0,1,1,0,102
All,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,202
