In [1]:
%pylab inline
%matplotlib inline

# Global Imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets
import os
import sys
import pickle
from pprint import pprint
from time import time
import datetime
from time import gmtime, strftime
import statsmodels.api as sm
from patsy import dmatrices

# Scikit-Learn imports
from sklearn.linear_model import LogisticRegression

from sklearn.pipeline import Pipeline
from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import StratifiedKFold
from sklearn.cross_validation import train_test_split

from sklearn.cross_validation import cross_val_score
from sklearn import metrics

# Local Imports
path = str(os.path.expanduser('~')) + '/git/predictEPL/config'
sys.path.append(path)
import paths

sys.path.append(paths.UTILS)
import useful_methods

Populating the interactive namespace from numpy and matplotlib


In [2]:
date_now = strftime("%Y-%m-%d %H:%M:%S", gmtime()).replace(" ", "_")

# ***************************************************
# [Step 1]: Data Load
# ***************************************************

# Read Data
df = useful_methods.csv_dic_df(paths.DATA_HOME + "EPL/all_game_emolex_counted.csv")

# Manipulations

dic_res ={
    'home_win': 1,
    'away_win': 0,
    'draw': 2
}

def Labeling(goal_diff):
    if goal_diff > 0:
        return 1
    elif goal_diff < 0:
        return 0
    else:
        return 2

df.score_ft_home = [int(score_ft_home) for score_ft_home in df.score_ft_home]
df.score_ft_away = [int(score_ft_away) for score_ft_away in df.score_ft_away]

df.pn_home = [np.array([float(pn) for pn in pn_home[1:-1].split(',')]) for pn_home in list(df.pn_home)]
df.pn_away = [np.array([float(pn) for pn in pn_away[1:-1].split(',')]) for pn_away in list(df.pn_away)]

df.emolex_home = [np.array([float(emo) for emo in emolex_home[1:-1].split(',')]) for emolex_home in list(df.emolex_home)]
df.emolex_away = [np.array([float(emo) for emo in emolex_away[1:-1].split(',')]) for emolex_away in list(df.emolex_away)]

df['goal_diff'] = df.score_ft_home - df.score_ft_away
df['result'] = [Labeling(goal_diff) for goal_diff in df.goal_diff]
# df = df[df.result != 2].copy().reset_index(drop=True)
df

Unnamed: 0,GW,away_team,date,emolex_away,emolex_home,home_team,pn_away,pn_home,score_ft_away,score_ft_home,score_ht_away,score_ht_home,time,goal_diff,result
0,5,Chelsea,Sat. 12 Sep.,"[1732.0, 1670.0, 1436.0, 1569.0, 1079.0, 2217....","[405.0, 287.0, 192.0, 322.0, 291.0, 484.0, 530...",Everton,"[3901.0, 3699.0]","[1009.0, 647.0]",1,3,1,2,11:45,2,1
1,5,Bournemouth,Sat. 12 Sep.,"[36.0, 36.0, 25.0, 22.0, 27.0, 50.0, 37.0, 37.0]","[67.0, 64.0, 22.0, 54.0, 122.0, 194.0, 122.0, ...",Norwich,"[112.0, 53.0]","[331.0, 100.0]",1,3,0,1,14:00,2,1
2,5,Swansea,Sat. 12 Sep.,"[24.0, 23.0, 10.0, 20.0, 22.0, 51.0, 33.0, 46.0]","[38.0, 22.0, 18.0, 20.0, 57.0, 66.0, 52.0, 58.0]",Watford,"[78.0, 36.0]","[98.0, 55.0]",0,1,0,0,14:00,1,1
3,5,Southampton,Sat. 12 Sep.,"[23.0, 28.0, 17.0, 23.0, 75.0, 65.0, 68.0, 67.0]","[56.0, 30.0, 38.0, 32.0, 43.0, 80.0, 48.0, 94.0]",WestBromwich,"[154.0, 62.0]","[116.0, 116.0]",0,0,0,0,14:00,0,2
4,5,Stoke,Sat. 12 Sep.,"[82.0, 59.0, 40.0, 47.0, 75.0, 130.0, 107.0, 8...","[603.0, 445.0, 421.0, 432.0, 1039.0, 1681.0, 1...",Arsenal,"[255.0, 113.0]","[2529.0, 938.0]",0,2,0,1,14:00,2,1
5,5,City,Sat. 12 Sep.,"[1029.0, 1358.0, 538.0, 805.0, 441.0, 378.0, 3...","[146.0, 113.0, 60.0, 80.0, 100.0, 190.0, 145.0...",Crystal,"[975.0, 1708.0]","[299.0, 225.0]",1,0,0,0,14:00,-1,0
6,5,Liverpool,Sat. 12 Sep.,"[1743.0, 1955.0, 1644.0, 1838.0, 1207.0, 2875....","[5228.0, 3026.0, 2403.0, 2447.0, 2705.0, 7167....",United,"[4881.0, 4464.0]","[11807.0, 9999.0]",1,3,0,0,16:30,2,1
7,5,Tottenham,Sun. 13 Sep.,"[379.0, 365.0, 248.0, 315.0, 297.0, 511.0, 421...","[158.0, 152.0, 87.0, 120.0, 205.0, 329.0, 218....",Sunderland,"[1117.0, 815.0]","[498.0, 330.0]",1,0,0,0,12:30,-1,0
8,5,Villa,Sun. 13 Sep.,"[195.0, 233.0, 203.0, 191.0, 384.0, 725.0, 881...","[70.0, 75.0, 74.0, 65.0, 99.0, 122.0, 99.0, 10...",Leicester,"[1550.0, 480.0]","[276.0, 163.0]",2,3,1,0,15:00,1,1
9,5,Newcastle,Mon. 14 Sep.,"[589.0, 697.0, 473.0, 643.0, 418.0, 919.0, 574...","[96.0, 92.0, 64.0, 68.0, 147.0, 401.0, 297.0, ...",WestHam,"[1560.0, 1563.0]","[655.0, 202.0]",0,2,0,1,19:00,2,1


In [6]:
df = df[df.result != 2].copy().reset_index(drop=True)
dta = pd.DataFrame()

dta['team_home'] = df.home_team
dta['team_away'] = df.away_team

dta['pos_home'] = [pn_home[0] / sum(pn_home)  for pn_home in df.pn_home]
dta['neg_home'] = [pn_home[1] / sum(pn_home)  for pn_home in df.pn_home]

dta['pos_away'] = [pn_away[0] / sum(pn_away)  for pn_away in df.pn_away]
dta['neg_away'] = [pn_away[1] / sum(pn_away)  for pn_away in df.pn_away]

dta['score_ht_home'] = [int(score_ht_home) for score_ht_home in df.score_ht_home]
dta['score_ht_away'] = [int(score_ht_away) for score_ht_away in df.score_ht_away]

dta['anger_home'] = [emolex[0] / sum(emolex) for emolex in df.emolex_home]
dta['fear_home'] = [emolex[1] / sum(emolex) for emolex in df.emolex_home]
dta['disgust_home'] = [emolex[2] / sum(emolex) for emolex in df.emolex_home]
dta['sadness_home'] = [emolex[3] / sum(emolex)  for emolex in df.emolex_home]
dta['surprise_home'] = [emolex[4] / sum(emolex)  for emolex in df.emolex_home]
dta['trust_home'] = [emolex[5] / sum(emolex)  for emolex in df.emolex_home]
dta['joy_home'] = [emolex[6] / sum(emolex)  for emolex in df.emolex_home]
dta['anticipation_home'] = [emolex[7] / sum(emolex)  for emolex in df.emolex_home]


dta['anger_away'] = [emolex[0] / sum(emolex)  for emolex in df.emolex_away]
dta['fear_away'] = [emolex[1] / sum(emolex)  for emolex in df.emolex_away]
dta['disgust_away'] = [emolex[2] / sum(emolex)  for emolex in df.emolex_away]
dta['sadness_away'] = [emolex[3] / sum(emolex)  for emolex in df.emolex_away]
dta['surprise_away'] = [emolex[4] / sum(emolex)  for emolex in df.emolex_away]
dta['trust_away'] = [emolex[5] / sum(emolex)  for emolex in df.emolex_away]
dta['joy_away'] = [emolex[6] / sum(emolex)  for emolex in df.emolex_away]
dta['anticipation_away'] = [emolex[7] / sum(emolex)  for emolex in df.emolex_away]


# 'home_win': 1, 'away_win': 0, 'draw': 2
dta['result'] = df.result
dta

Unnamed: 0,team_home,team_away,pos_home,neg_home,pos_away,neg_away,score_ht_home,score_ht_away,anger_home,fear_home,...,anticipation_home,anger_away,fear_away,disgust_away,sadness_away,surprise_away,trust_away,joy_away,anticipation_away,result
0,Everton,Chelsea,0.609300,0.390700,0.513289,0.486711,2,1,0.136824,0.096959,...,0.151689,0.130648,0.125971,0.108320,0.118353,0.081391,0.167232,0.124085,0.143999,1
1,Norwich,Bournemouth,0.767981,0.232019,0.678788,0.321212,1,0,0.082209,0.078528,...,0.208589,0.133333,0.133333,0.092593,0.081481,0.100000,0.185185,0.137037,0.137037,1
2,Watford,Swansea,0.640523,0.359477,0.684211,0.315789,0,0,0.114804,0.066465,...,0.175227,0.104803,0.100437,0.043668,0.087336,0.096070,0.222707,0.144105,0.200873,1
3,Arsenal,Stoke,0.729449,0.270551,0.692935,0.307065,1,0,0.085133,0.062826,...,0.175491,0.130366,0.093800,0.063593,0.074722,0.119237,0.206677,0.170111,0.141494,1
4,Crystal,City,0.570611,0.429389,0.363399,0.636601,0,0,0.145563,0.112662,...,0.168495,0.173232,0.228620,0.090572,0.135522,0.074242,0.063636,0.057239,0.176936,0
5,United,Liverpool,0.541456,0.458544,0.522311,0.477689,0,0,0.150100,0.086879,...,0.175423,0.110031,0.123414,0.103781,0.116028,0.076195,0.181491,0.136039,0.153021,1
6,Sunderland,Tottenham,0.601449,0.398551,0.578157,0.421843,0,0,0.104153,0.100198,...,0.163481,0.128518,0.123771,0.084096,0.106816,0.100712,0.173279,0.142760,0.140047,0
7,Leicester,Villa,0.628702,0.371298,0.763547,0.236453,0,1,0.099010,0.106082,...,0.145686,0.054561,0.065193,0.056799,0.053442,0.107443,0.202854,0.246503,0.213206,1
8,WestHam,Newcastle,0.764294,0.235706,0.499520,0.500480,1,0,0.066992,0.064201,...,0.187020,0.115422,0.136586,0.092691,0.126004,0.081913,0.180090,0.112483,0.154811,1
9,Chelsea,Arsenal,0.563569,0.436431,0.410524,0.589476,0,0,0.117257,0.098728,...,0.176162,0.176181,0.120058,0.174138,0.128831,0.077995,0.142050,0.088811,0.091936,1


In [7]:
# 'home_win': 1, 'away_win': 0, 'draw': 2
dta.groupby('result').mean()

Unnamed: 0_level_0,pos_home,neg_home,pos_away,neg_away,score_ht_home,score_ht_away,anger_home,fear_home,disgust_home,sadness_home,...,joy_home,anticipation_home,anger_away,fear_away,disgust_away,sadness_away,surprise_away,trust_away,joy_away,anticipation_away
result,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.594051,0.405949,0.626635,0.373365,0.375,0.833333,0.121734,0.110601,0.074734,0.095972,...,0.14644,0.155577,0.114128,0.102344,0.067767,0.100756,0.102807,0.195737,0.153609,0.162851
1,0.656051,0.343949,0.557368,0.442632,1.121622,0.202703,0.104693,0.09181,0.058235,0.084646,...,0.16945,0.171276,0.125709,0.115874,0.085068,0.112246,0.100829,0.171509,0.135504,0.153261


In [8]:
dta.groupby('team_home').mean()

Unnamed: 0_level_0,pos_home,neg_home,pos_away,neg_away,score_ht_home,score_ht_away,anger_home,fear_home,disgust_home,sadness_home,...,anticipation_home,anger_away,fear_away,disgust_away,sadness_away,surprise_away,trust_away,joy_away,anticipation_away,result
team_home,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Arsenal,0.674028,0.325972,0.604427,0.395573,1.428571,0.285714,0.087535,0.07936,0.065175,0.080684,...,0.173979,0.131546,0.112774,0.073552,0.091902,0.112734,0.184227,0.142611,0.150654,1.0
Bournemouth,0.633675,0.366325,0.534074,0.465926,1.2,0.8,0.140131,0.099696,0.038559,0.100068,...,0.180735,0.127971,0.136527,0.070705,0.125924,0.091703,0.168595,0.144962,0.133613,0.6
Chelsea,0.597954,0.402046,0.55391,0.44609,0.714286,0.285714,0.120468,0.1002,0.088472,0.080178,...,0.162971,0.12716,0.104714,0.096272,0.099196,0.094776,0.191785,0.138215,0.147881,0.571429
City,0.659909,0.340091,0.61881,0.38119,1.666667,0.777778,0.102162,0.101931,0.058762,0.081475,...,0.151192,0.104725,0.097585,0.07754,0.105992,0.106878,0.186939,0.161171,0.15917,0.777778
Crystal,0.581266,0.418734,0.499885,0.500115,0.714286,0.428571,0.133895,0.105629,0.076673,0.09122,...,0.151753,0.130982,0.140204,0.09227,0.114605,0.096597,0.157136,0.122931,0.145274,0.428571
Everton,0.658215,0.341785,0.606495,0.393505,1.5,1.166667,0.10576,0.082907,0.056092,0.093567,...,0.162335,0.128271,0.121605,0.100767,0.117768,0.096262,0.162941,0.133345,0.13904,0.5
Leicester,0.6986,0.3014,0.600017,0.399983,0.333333,1.0,0.097692,0.099803,0.079255,0.080781,...,0.163679,0.077329,0.103551,0.072591,0.098639,0.127637,0.176681,0.16579,0.177781,0.666667
Liverpool,0.648134,0.351866,0.624777,0.375223,0.25,0.0,0.098068,0.094398,0.052681,0.07952,...,0.183667,0.10771,0.098057,0.066533,0.08895,0.093255,0.191695,0.1682,0.1856,0.75
Newcastle,0.546035,0.453965,0.648265,0.351735,0.833333,0.833333,0.123191,0.119758,0.096341,0.106333,...,0.152155,0.111121,0.093293,0.069103,0.081851,0.107633,0.219039,0.129807,0.188153,0.5
Norwich,0.638402,0.361598,0.649245,0.350755,0.4,0.2,0.097326,0.098345,0.056719,0.089629,...,0.182115,0.127074,0.097301,0.069334,0.085976,0.125694,0.162663,0.15261,0.179346,0.6


In [9]:
dta.groupby('team_away').mean()

Unnamed: 0_level_0,pos_home,neg_home,pos_away,neg_away,score_ht_home,score_ht_away,anger_home,fear_home,disgust_home,sadness_home,...,anticipation_home,anger_away,fear_away,disgust_away,sadness_away,surprise_away,trust_away,joy_away,anticipation_away,result
team_away,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Arsenal,0.618798,0.381202,0.593907,0.406093,0.571429,0.714286,0.11339,0.094906,0.073109,0.088128,...,0.189755,0.125838,0.11691,0.075553,0.096254,0.096913,0.206699,0.114889,0.166943,0.428571
Bournemouth,0.656144,0.343856,0.559445,0.440555,1.285714,0.142857,0.099751,0.096012,0.067514,0.080255,...,0.175305,0.146399,0.139675,0.06321,0.128519,0.084103,0.168802,0.128449,0.140842,0.714286
Chelsea,0.628736,0.371264,0.51369,0.48631,1.0,0.666667,0.119413,0.100288,0.067191,0.093805,...,0.155675,0.131022,0.127351,0.121731,0.102861,0.087322,0.166386,0.131456,0.13187,0.666667
City,0.681625,0.318375,0.55888,0.44112,1.0,0.2,0.098911,0.094902,0.051783,0.061647,...,0.180261,0.12854,0.139269,0.07836,0.130484,0.088628,0.168231,0.121634,0.144854,0.6
Crystal,0.70547,0.29453,0.593682,0.406318,0.333333,0.166667,0.099127,0.105947,0.056331,0.064999,...,0.172992,0.103731,0.094753,0.071116,0.105444,0.122688,0.172448,0.159811,0.170009,0.666667
Everton,0.634775,0.365225,0.53889,0.46111,1.0,0.333333,0.106366,0.089691,0.066559,0.0867,...,0.190916,0.112193,0.129356,0.085811,0.109879,0.118459,0.172337,0.132078,0.139886,0.333333
Leicester,0.624161,0.375839,0.678776,0.321224,0.285714,0.714286,0.109882,0.110937,0.067975,0.09107,...,0.175068,0.114641,0.086767,0.079269,0.072354,0.101272,0.201101,0.159842,0.184755,0.142857
Liverpool,0.624103,0.375897,0.539161,0.460839,0.714286,0.571429,0.110691,0.102518,0.068887,0.097099,...,0.151993,0.128385,0.118729,0.095694,0.107925,0.097075,0.181686,0.127498,0.143008,0.571429
Newcastle,0.628853,0.371147,0.522571,0.477429,1.0,0.285714,0.10443,0.102145,0.063481,0.096201,...,0.154208,0.131937,0.129423,0.100075,0.116894,0.098965,0.160772,0.129819,0.132114,0.857143
Norwich,0.587576,0.412424,0.550352,0.449648,0.875,0.375,0.146765,0.11708,0.073627,0.112586,...,0.119126,0.132852,0.113231,0.080606,0.109669,0.104347,0.168216,0.139889,0.151189,0.875


## Prepare Data for Logistic Regression

In [54]:
y, X = dmatrices('result ~ \
    pos_home + neg_home + pos_away + neg_away + \
    score_ht_home + score_ht_away + \
    anger_home + fear_home + disgust_home + sadness_home + \
    surprise_home + trust_home + joy_home + anticipation_home + \
    anger_away + fear_away + disgust_away + sadness_away + \
    surprise_away + trust_away + joy_away + anticipation_away + \
    C(team_home) + C(team_away)',
    dta, return_type="dataframe")

# flatten y into a 1-D array
y = np.ravel(y)

print(X.columns)
X.head()

Index(['Intercept', 'C(team_home)[T.Bournemouth]', 'C(team_home)[T.Chelsea]',
       'C(team_home)[T.City]', 'C(team_home)[T.Crystal]',
       'C(team_home)[T.Everton]', 'C(team_home)[T.Leicester]',
       'C(team_home)[T.Liverpool]', 'C(team_home)[T.Newcastle]',
       'C(team_home)[T.Norwich]', 'C(team_home)[T.Southampton]',
       'C(team_home)[T.Stoke]', 'C(team_home)[T.Sunderland]',
       'C(team_home)[T.Swansea]', 'C(team_home)[T.Tottenham]',
       'C(team_home)[T.United]', 'C(team_home)[T.Villa]',
       'C(team_home)[T.Watford]', 'C(team_home)[T.WestBromwich]',
       'C(team_home)[T.WestHam]', 'C(team_away)[T.Bournemouth]',
       'C(team_away)[T.Chelsea]', 'C(team_away)[T.City]',
       'C(team_away)[T.Crystal]', 'C(team_away)[T.Everton]',
       'C(team_away)[T.Leicester]', 'C(team_away)[T.Liverpool]',
       'C(team_away)[T.Newcastle]', 'C(team_away)[T.Norwich]',
       'C(team_away)[T.Southampton]', 'C(team_away)[T.Stoke]',
       'C(team_away)[T.Sunderland]', 'C(team_aw

Unnamed: 0,Intercept,C(team_home)[T.Bournemouth],C(team_home)[T.Chelsea],C(team_home)[T.City],C(team_home)[T.Crystal],C(team_home)[T.Everton],C(team_home)[T.Leicester],C(team_home)[T.Liverpool],C(team_home)[T.Newcastle],C(team_home)[T.Norwich],...,joy_home,anticipation_home,anger_away,fear_away,disgust_away,sadness_away,surprise_away,trust_away,joy_away,anticipation_away
0,1,0,0,0,0,1,0,0,0,0,...,0.179054,0.151689,0.130648,0.125971,0.10832,0.118353,0.081391,0.167232,0.124085,0.143999
1,1,0,0,0,0,0,0,0,0,1,...,0.149693,0.208589,0.133333,0.133333,0.092593,0.081481,0.1,0.185185,0.137037,0.137037
2,1,0,0,0,0,0,0,0,0,0,...,0.1571,0.175227,0.104803,0.100437,0.043668,0.087336,0.09607,0.222707,0.144105,0.200873
3,1,0,0,0,0,0,0,0,0,0,...,0.172102,0.175491,0.130366,0.0938,0.063593,0.074722,0.119237,0.206677,0.170111,0.141494
4,1,0,0,0,1,0,0,0,0,0,...,0.144566,0.168495,0.173232,0.22862,0.090572,0.135522,0.074242,0.063636,0.057239,0.176936


In [55]:
# instantiate a logistic regression model, and fit with X and y
model = LogisticRegression()
model = model.fit(X, y)

# check the accuracy on the training set
print(model.score(X, y))


# evaluate the model using 10-fold cross-validation
scores = cross_val_score(LogisticRegression(), X, y, scoring='accuracy', cv=10)
print(scores)
print(scores.mean())

0.867768595041
[ 0.69230769  0.61538462  0.76923077  0.84615385  0.83333333  0.66666667
  0.83333333  0.63636364  0.63636364  0.72727273]
0.725641025641


In [50]:
coef = pd.DataFrame()
coef['var'] = X.columns
coef['score'] = np.transpose(model.coef_)
# coef[0:20]
# coef[20:39]
coef[39::]

Unnamed: 0,var,score
39,pos_home,0.093153
40,neg_home,-0.000341
41,pos_away,-0.050268
42,neg_away,0.14308
43,score_ht_home,1.456251
44,score_ht_away,-1.747991
45,anger_home,0.008061
46,fear_home,-0.003984
47,disgust_home,-0.057545
48,sadness_home,0.126706


## Model Evaluation Using a Validation Set

In [30]:
# evaluate the model by splitting into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
model2 = LogisticRegression()
model2.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr',
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0)

In [31]:
# predict class labels for the test set
predicted = model2.predict(X_test)
print(predicted)

[ 1.  1.  0.  1.  0.  1.  0.  1.  1.  1.  1.  1.  0.  1.  1.  1.  1.  1.
  1.  0.  1.  1.  0.  0.  1.  0.  1.  0.  0.  1.  1.  0.  0.  0.  1.  0.
  0.]


In [32]:
# generate class probabilities
probs = model2.predict_proba(X_test)
print(probs)

[[ 0.46095175  0.53904825]
 [ 0.14858166  0.85141834]
 [ 0.66331607  0.33668393]
 [ 0.13269906  0.86730094]
 [ 0.8608485   0.1391515 ]
 [ 0.21941747  0.78058253]
 [ 0.77886447  0.22113553]
 [ 0.01450591  0.98549409]
 [ 0.42324699  0.57675301]
 [ 0.00147993  0.99852007]
 [ 0.00550222  0.99449778]
 [ 0.08038558  0.91961442]
 [ 0.54797909  0.45202091]
 [ 0.32690864  0.67309136]
 [ 0.31757799  0.68242201]
 [ 0.28261236  0.71738764]
 [ 0.12384658  0.87615342]
 [ 0.20704192  0.79295808]
 [ 0.09086022  0.90913978]
 [ 0.98333418  0.01666582]
 [ 0.04392126  0.95607874]
 [ 0.10166496  0.89833504]
 [ 0.68073728  0.31926272]
 [ 0.9335452   0.0664548 ]
 [ 0.2814178   0.7185822 ]
 [ 0.87222308  0.12777692]
 [ 0.00643372  0.99356628]
 [ 0.83277918  0.16722082]
 [ 0.64833495  0.35166505]
 [ 0.14007145  0.85992855]
 [ 0.05526444  0.94473556]
 [ 0.65033055  0.34966945]
 [ 0.55808716  0.44191284]
 [ 0.55711246  0.44288754]
 [ 0.14941533  0.85058467]
 [ 0.51488937  0.48511063]
 [ 0.61355786  0.38644214]]


In [33]:
# generate evaluation metrics
print(metrics.accuracy_score(y_test, predicted))
print(metrics.roc_auc_score(y_test, probs[:, 1]))

0.675675675676
0.786713286713


In [34]:
print(metrics.confusion_matrix(y_test, predicted))
print(metrics.classification_report(y_test, predicted))

[[ 7  4]
 [ 8 18]]
             precision    recall  f1-score   support

        0.0       0.47      0.64      0.54        11
        1.0       0.82      0.69      0.75        26

avg / total       0.71      0.68      0.69        37



## Model Evaluation Using Cross-Validation

In [35]:
# evaluate the model using 10-fold cross-validation
scores = cross_val_score(LogisticRegression(), X, y, scoring='accuracy', cv=10)
print(scores)
print(scores.mean())

[ 0.69230769  0.61538462  0.76923077  0.84615385  0.83333333  0.66666667
  0.83333333  0.63636364  0.63636364  0.72727273]
0.725641025641
