In [5]:
import numpy as np
import pandas as pd
from bokeh.plotting import figure, output_file, show
from bokeh.io import output_notebook, curdoc
from bokeh.themes import Theme
from bokeh.models import HoverTool
from bokeh.palettes import Spectral11
from bokeh.layouts import gridplot
from bokeh.models import ColumnDataSource
from bokeh.charts import Histogram, Donut, Bar
from bokeh.io import output_notebook
from bokeh.models.formatters import DatetimeTickFormatter as dttf
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
# from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix

In [6]:
df_clean = pd.read_json('messy_with_countries.json')
df_clean.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 19969 entries, 0 to 9999
Data columns (total 43 columns):
Age_at_win               14248 non-null float64
AverageWinsToDate        19969 non-null float64
CountryCode              19969 non-null object
CountryName              19969 non-null object
CumulativePrizeToDate    19969 non-null int64
CurrencyCode             19969 non-null object
CurrentHandle            19969 non-null object
DOB                      14248 non-null float64
EndDate                  19969 non-null int64
ExchangeRate             19969 non-null float64
Finals                   19969 non-null int64
GameId                   19969 non-null int64
NameFirst                19969 non-null object
NameLast                 19969 non-null object
Note                     105 non-null object
PlayerId                 19969 non-null int64
PriorWins                19969 non-null int64
Prize                    19969 non-null float64
PrizeToDate              19969 non-null int64
Pri

In [7]:
to_drop = [ 'rankplace_Cash Only', 'rankplace_Runners Up', 'rankplace_Winners',
            'CountryCode','CurrencyCode','CurrentHandle','ExchangeRate','GameId','NameFirst','NameLast','Note',
            'PlayerId','Prize','RankText','TeamPlayers','Team_On','TournamentName','team_history',
            'Team_On_Backup','DOB','EndDate','TotalUSDPrize', 'Prize_USD',
            'num_teams','date','totalsc2', 'CumulativePrizeToDate',
            'TournLength','Finals', 'WinsToDate','TTtoDate','sub-region', 'country']
test_here = ['SecondWins','AverageWinsToDate','PriorWins',]
df_dropped = df_clean.drop(to_drop, axis=1)
output_notebook()

In [8]:
df_dropped.head()

Unnamed: 0,Age_at_win,AverageWinsToDate,CountryName,PriorWins,PrizeToDate,Rank,SecondWins,currency,region,teams
0,23.0,0.0,Ukraine,0,0,Winners,0,EUR,Europe,Unaffiliated
1,23.0,136.0,Ukraine,1,136,Winners,0,EUR,Europe,Unaffiliated
10,24.0,638.4,Ukraine,8,6384,Winners,1,EUR,Europe,Other
100,20.0,11777.4,Korea (Republic of),6,176661,Runners Up,3,USD,Asia,Old Generations
1000,17.0,816.25,Korea (Republic of),3,9795,Runners Up,1,KRW,Asia,Other


In [9]:
#prep for classification
df_dums = pd.get_dummies(data=df_dropped, columns = ['Rank','currency','teams','CountryName','region'])
df_dums['Age_at_win'] = df_dums['Age_at_win'].fillna(df_dums['Age_at_win'].mean())
df_dums['y']= df_dums['Rank_Runners Up'] + df_dums['Rank_Winners']
df_class = df_dums.drop(['Rank_Runners Up','Rank_Cash Only','Rank_Winners'], axis=1)
y_out = df_class.pop('y')
df_class.head()

Unnamed: 0,Age_at_win,AverageWinsToDate,PriorWins,PrizeToDate,SecondWins,currency_AUD,currency_CNY,currency_EUR,currency_KRW,currency_Other,...,CountryName_United Arab Emirates,CountryName_United Kingdom of Great Britain and Northern Ireland,CountryName_United States of America,CountryName_Uruguay,CountryName_Venezuela (Bolivarian Republic of),CountryName_Viet Nam,region_Americas,region_Asia,region_Europe,region_Other
0,23.0,0.0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,1,0
1,23.0,136.0,1,136,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,1,0
10,24.0,638.4,8,6384,1,0,0,1,0,0,...,0,0,0,0,0,0,0,0,1,0
100,20.0,11777.4,6,176661,3,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1000,17.0,816.25,3,9795,1,0,0,0,1,0,...,0,0,0,0,0,0,0,1,0,0


In [10]:
X, X_finaltest, y, y_final_test = train_test_split(df_class,y_out, random_state=314)

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [12]:
def feature_importance(clf, X):
    importances = clf.feature_importances_
    indices = np.argsort(importances)[::-1]
    j = list(X.columns)
    # Print the feature ranking
    print("Feature ranking:")
    l = []
    for f in range(X.shape[1]):
        l.append( [j[indices[f]], importances[indices[f]]])
    return l

def standard_confusion_matrix(y_true, y_pred):
    """Make confusion matrix with format:
                  -----------
                  | TP | FP |
                  -----------
                  | FN | TN |
                  -----------
    Parameters
    ----------
    y_true : ndarray - 1D
    y_pred : ndarray - 1D

    Returns
    -------
    ndarray - 2D
    """
    [[tn, fp], [fn, tp]] = confusion_matrix(y_true, y_pred)
    return np.array([[tp, fp], [fn, tn]])

def roc_curve(probabilities, labels):
    '''
    INPUT: numpy array, numpy array
    OUTPUT: list, list, list

    Take a numpy array of the predicted probabilities and a numpy array of the
    true labels.
    Return the True Positive Rates, False Positive Rates and Thresholds for the
    ROC curve.
    '''
    
    thresholds = np.sort(probabilities)

    tprs = []
    fprs = []

    num_positive_cases = sum(labels)
    num_negative_cases = len(labels) - num_positive_cases

    for threshold in thresholds:
        # With this threshold, give the prediction of each instance
        predicted_positive = probabilities >= threshold
        # Calculate the number of correctly predicted positive cases
        true_positives = np.sum(predicted_positive * labels)
        # Calculate the number of incorrectly predicted positive cases
        false_positives = np.sum(predicted_positive) - true_positives
        # Calculate the True Positive Rate
        tpr = true_positives / float(num_positive_cases)
        # Calculate the False Positive Rate
        fpr = false_positives / float(num_negative_cases)

        fprs.append(fpr)
        tprs.append(tpr)

    return tprs, fprs, thresholds.tolist()

In [13]:
clf = LogisticRegression()
clf.fit(X_train, y_train)

probabilities = clf.predict_proba(X_test)[:, 1]
tprLR, fprLR, thresholds = roc_curve(probabilities, y_test)

#Cross-Validation
scores = cross_val_score(clf, X_train, y_train, cv=5)
print("Train Crossval Accuracy: " + str(scores))
print("Predicted Accuracy: "+ str(sum(scores)/5))
preds = clf.predict(X_test)
score = clf.score(X_test,y_test)
print('Test Accuracy: ' + str(score))
print(standard_confusion_matrix(y_test,preds))

Train Crossval Accuracy: [ 0.64546263  0.6571683   0.64737311  0.66028495  0.64692787]
Predicted Accuracy: 0.651443372639
Test Accuracy: 0.65811965812
[[1286  730]
 [ 550 1178]]


In [14]:
clf = GradientBoostingClassifier(learning_rate=.01, max_features=20,n_estimators=1000)
clf.fit(X_train, y_train)

#Cross-Validation
scores = cross_val_score(clf, X_train, y_train, cv=5)
print("Train Crossval Accuracy: " + str(scores))
print("Predicted Accuracy: "+ str(sum(scores)/5))

probabilities = clf.predict_proba(X_test)[:, 1]
tprGB, fprGB, thresholds = roc_curve(probabilities, y_test)

preds = clf.predict(X_test)
score = clf.score(X_test,y_test)
print('Test Accuracy: ' + str(score))
print(standard_confusion_matrix(y_test,preds))


feature_importance(clf, X)
    

Train Crossval Accuracy: [ 0.64679715  0.67497774  0.65004452  0.6571683   0.63624221]
Predicted Accuracy: 0.653045984479
Test Accuracy: 0.671741452991
[[1299  692]
 [ 537 1216]]
Feature ranking:


[['PriorWins', 0.1091055000021561],
 ['currency_EUR', 0.10727832722784594],
 ['currency_KRW', 0.10310529961392077],
 ['AverageWinsToDate', 0.074622802707560054],
 ['PrizeToDate', 0.072539948715396152],
 ['Age_at_win', 0.057772514458246507],
 ['SecondWins', 0.05149775387852816],
 ['CountryName_Germany', 0.045266629425705147],
 ['currency_Other', 0.03204249800046996],
 ['CountryName_Korea (Republic of)', 0.023701347504040817],
 ['teams_KT Rolster', 0.020155314963090924],
 ['currency_AUD', 0.020123584879650543],
 ['currency_USD', 0.019447554202173839],
 ['CountryName_Taiwan, Province of China', 0.018731001111759436],
 ['CountryName_Malaysia', 0.014022272612837693],
 ['teams_Jin Air Green Wings', 0.012957872026157405],
 ['teams_Other', 0.012274054650598073],
 ['CountryName_Spain', 0.012038009406390439],
 ['teams_Evil Geniuses', 0.010762687715709297],
 ['CountryName_Mexico', 0.0099896253629884694],
 ['region_Europe', 0.0096914578928335385],
 ['CountryName_Ukraine', 0.0096502389756266266],
 

In [15]:
clf = RandomForestClassifier(max_depth=10, n_jobs=750, max_features=30)
clf.fit(X_train, y_train)

#Cross-Validation
scores = cross_val_score(clf, X_train, y_train, cv=5)
print("Train Crossval Accuracy: " + str(scores))
print("Predicted Accuracy: "+ str(sum(scores)/5))

probabilities = clf.predict_proba(X_test)[:, 1]
tprRF, fprRF, thresholds = roc_curve(probabilities, y_test)

preds = clf.predict(X_test)
score = clf.score(X_test,y_test)
print('Test Accuracy: ' + str(score))
print(standard_confusion_matrix(y_test,preds))

feature_importance(clf, X)
fi = feature_importance(clf, X)
for j in fi:
    print(j)

Train Crossval Accuracy: [ 0.64323843  0.67898486  0.66162066  0.67631345  0.64024933]
Predicted Accuracy: 0.660081346672
Test Accuracy: 0.676014957265
[[1217  594]
 [ 619 1314]]
Feature ranking:
Feature ranking:
['currency_KRW', 0.15915293612611153]
['currency_EUR', 0.13078969490538478]
['AverageWinsToDate', 0.10882282051043282]
['PrizeToDate', 0.10003660186485927]
['PriorWins', 0.098628137967880461]
['SecondWins', 0.060684379941434129]
['currency_USD', 0.059145569190234101]
['Age_at_win', 0.051229156550758118]
['CountryName_Korea (Republic of)', 0.022189314278892983]
['CountryName_Germany', 0.020386586743846608]
['region_Europe', 0.011901825923216144]
['teams_Other', 0.011419930628598401]
['currency_Other', 0.011381723998970176]
['teams_Unaffiliated', 0.010872316303528414]
['CountryName_Taiwan, Province of China', 0.010183753450435812]
['currency_CNY', 0.0084205554062598519]
['region_Asia', 0.0081569565619390317]
['teams_KT Rolster', 0.0074796160915575043]
['CountryName_China', 0.006

In [56]:
j = fi[0:8]
k = []
l= []
v = 1
for i in j:
    k.append(str(v) +"-"+i[0])
    l.append(i[1])
    v+=1
print(k)
print(l)
dffi = pd.DataFrame({})
dffi['Feature']=k
dffi['Importance']=l
dffi

['1-currency_KRW', '2-currency_EUR', '3-AverageWinsToDate', '4-PrizeToDate', '5-PriorWins', '6-SecondWins', '7-currency_USD', '8-Age_at_win']
[0.15915293612611153, 0.13078969490538478, 0.10882282051043282, 0.10003660186485927, 0.098628137967880461, 0.060684379941434129, 0.059145569190234101, 0.051229156550758118]


Unnamed: 0,Feature,Importance
0,1-currency_KRW,0.159153
1,2-currency_EUR,0.13079
2,3-AverageWinsToDate,0.108823
3,4-PrizeToDate,0.100037
4,5-PriorWins,0.098628
5,6-SecondWins,0.060684
6,7-currency_USD,0.059146
7,8-Age_at_win,0.051229


In [97]:

theme = Theme(json={
    'attrs': {
        'Figure': {
            'background_fill_color': '#2F2F2F',
            'border_fill_color': '#2F2F2F',
            'outline_line_color': '#444444'
            },
        'Axis': {
            'axis_line_color': "white",
            'axis_label_text_color': "white",
            'major_label_text_color': "white",
            'major_tick_line_color': "white",
            'minor_tick_line_color': "white",
            'minor_tick_line_color': "white"
            },
        'Grid': {
            'grid_line_dash': [6, 4],
            'grid_line_alpha': .3
            },
        'Circle': {
            'fill_color': 'lightblue',
            'size': 10,
            },
        'Title': {
            'text_color': "white"
            }
        }
    })
curdoc().theme = theme
# output_notebook()

LR = zip(fprLR,tprLR)
GB = zip(fprGB,tprGB)
RF = zip(fprRF,tprRF)

cv=['fprLR','tprLR','fprGB','tprGB','fprRF','tprRF']
vc=[fprLR, tprLR, fprGB, tprGB, fprRF, tprRF]
g = pd.DataFrame({})
for i in range(len(cv)):
    g[cv[i]]=vc[i]

g.head()

def roc_print(df):
    LR = list(roc['LR'])
    GB = list(roc['GB'])
    RF = list(roc['RF'])
    fprLR = [i[0] for i in LR]
    tprLR = [i[1] for i in LR]
    fprGB = [i[0] for i in GB]
    tprGB = [i[1] for i in GB]
    fprRF = [i[0] for i in RF]
    tprRF = [i[1] for i in RF]
    
    TOOLS = [HoverTool(tooltips=[('FPR:','@x'),('TPR','@y')])]
    pal = Spectral11
    p = figure(x_axis_label='False Positive Rate', y_axis_label='True Positive Rate', tools=TOOLS)
    xvals = np.arange(0,1.01,.01)
    yvals = np.arange(0,1.01,.01)

    p.line(fprLR, tprLR, legend = 'Logistic Regression', line_width=3, line_color=pal[0])
    p.line(fprGB, tprGB, legend = 'Gradient Boost', line_width=3, line_color=pal[1])
    p.line(fprRF, tprRF, legend = 'Random Forest', line_width=3, line_color=pal[2])
    p.line(xvals,yvals, legend = 'Random Chance', line_width=3, line_color=pal[3])
    p.legend.location = 'bottom_right'
    return p

TOOLS = [HoverTool(tooltips=[('FPR:','@x'),('TPR','@y')])]
pal = Spectral11
p = figure(x_axis_label='False Positive Rate', y_axis_label='True Positive Rate', tools=TOOLS)
xvals = np.arange(0,1.01,.01)
yvals = np.arange(0,1.01,.01)

p.line(fprLR, tprLR, legend = 'Logistic Regression', line_width=3, line_color=pal[0])
p.line(fprGB, tprGB, legend = 'Gradient Boost', line_width=3, line_color=pal[1])
p.line(fprRF, tprRF, legend = 'Random Forest', line_width=3, line_color=pal[2])
p.line(xvals,yvals, legend = 'Random Chance', line_width=3, line_color=pal[3])
p.legend.location = 'bottom_right'
show(p)

In [34]:
print(list(X.columns))

['Age_at_win', 'AverageWinsToDate', 'PriorWins', 'PrizeToDate', 'SecondWins', 'currency_AUD', 'currency_CNY', 'currency_EUR', 'currency_KRW', 'currency_Other', 'currency_USD', 'teams_CJ Entus', 'teams_Dead Pixels', 'teams_ESC Gaming', 'teams_Evil Geniuses', 'teams_FXOpen e-Sports', 'teams_Incredible Miracle', 'teams_Invictus Gaming', 'teams_Jin Air Green Wings', 'teams_KT Rolster', 'teams_MVP', 'teams_Millenium', 'teams_Old Generations', 'teams_Other', 'teams_PSISTORM Gaming', 'teams_Prime', 'teams_ROOT Gaming', 'teams_SK Telecom T1', 'teams_SlayerS', 'teams_StarCraft II (Samsung Galaxy)', 'teams_StarTale', 'teams_Team Acer', 'teams_Team Empire', 'teams_Team Liquid', 'teams_Team SCV Life', 'teams_Unaffiliated', 'teams_mYinsanity', 'teams_mousesports', 'CountryName_Argentina', 'CountryName_Australia', 'CountryName_Austria', 'CountryName_Belarus', 'CountryName_Belgium', 'CountryName_Bolivia (Plurinational State of)', 'CountryName_Brazil', 'CountryName_Bulgaria', 'CountryName_Canada', 'Co

In [70]:
g.to_json('roc.json')

In [35]:
print(feature_importance(clf,X))

Feature ranking:
[['currency_KRW', 0.15915293612611153], ['currency_EUR', 0.13078969490538478], ['AverageWinsToDate', 0.10882282051043282], ['PrizeToDate', 0.10003660186485927], ['PriorWins', 0.098628137967880461], ['SecondWins', 0.060684379941434129], ['currency_USD', 0.059145569190234101], ['Age_at_win', 0.051229156550758118], ['CountryName_Korea (Republic of)', 0.022189314278892983], ['CountryName_Germany', 0.020386586743846608], ['region_Europe', 0.011901825923216144], ['teams_Other', 0.011419930628598401], ['currency_Other', 0.011381723998970176], ['teams_Unaffiliated', 0.010872316303528414], ['CountryName_Taiwan, Province of China', 0.010183753450435812], ['currency_CNY', 0.0084205554062598519], ['region_Asia', 0.0081569565619390317], ['teams_KT Rolster', 0.0074796160915575043], ['CountryName_China', 0.0060801744295917422], ['currency_AUD', 0.0060191304657434257], ['teams_SK Telecom T1', 0.0048785726049644807], ['teams_Team Empire', 0.004354310742365774], ['CountryName_France', 0

In [36]:
df = df_clean

In [37]:
df.date = pd.to_datetime(df.date)
df.date

0       2010-02-28
1       2010-03-14
10      2011-01-23
100     2011-07-10
1000    2013-11-23
10000   2016-01-10
10001   2015-04-05
10002   2016-03-26
10003   2012-07-15
10004   2015-05-03
10005   2016-04-03
10006   2012-05-06
10007   2011-04-15
10008   2011-08-16
10009   2012-07-22
1001    2014-05-14
10010   2012-06-03
10011   2013-03-29
10012   2013-07-28
10013   2014-02-17
10014   2011-05-27
10015   2011-05-29
10016   2011-09-17
10017   2011-10-09
10018   2012-02-23
10019   2012-02-24
1002    2014-05-26
10020   2012-07-21
10021   2013-06-29
10022   2014-08-31
           ...    
9972    2012-07-29
9973    2013-06-29
9974    2012-07-08
9975    2012-07-22
9976    2012-07-28
9977    2011-09-22
9978    2013-10-11
9979    2015-06-28
998     2013-08-10
9980    2015-06-28
9981    2015-06-28
9982    2011-08-31
9983    2014-03-10
9984    2014-04-21
9985    2014-09-28
9986    2015-11-14
9987    2014-08-23
9988    2011-04-24
9989    2011-05-05
999     2013-10-19
9990    2010-09-21
9991    2010

In [38]:
Country = df.groupby(df['CountryName'])['Prize_USD'].apply(lambda grp: grp.nlargest(5).sum())

In [39]:
ct = list(Country.index)
ctval = list(C)

NameError: name 'C' is not defined

In [None]:
top5 = Country.nlargest(5)
ct = list(zip(top5.index,top5))

In [None]:
def nlargest(df, col1, col2, n):
    x = df.groupby(df[col1])[col2].apply(lambda i: i.sum())
    tn = x.nlargest(n)
    df2 = pd.DataFrame({'year': tn.index, col2: tn})
    return df2

In [40]:
nlargest(df,'CountryName','Prize_USD',5)

NameError: name 'nlargest' is not defined

In [41]:
def bydate(df, col, date):
    x = df.groupby(df.date.dt.year)[col].apply(lambda i: i.sum())
    df2 = pd.DataFrame({'year': x.index, col: x})
    return df2

In [42]:
q = bydate(df,'Prize_USD','date')

In [60]:
def create_bar_chart3(df,col1, col2, title):
    plot = Bar(df, col1, values=col2, 
               legend=False, title=title,
               color=Spectral11[2])
    return plot

In [58]:
show(create_bar_chart(q, 'date','Prize_USD','Earnings By Year'))

In [45]:
p = nlargest(df,'CountryName','Prize_USD',10)
donut = create_donut(p, 'CountryName','Prize_USD','Top 10 Countries')

NameError: name 'nlargest' is not defined

In [None]:
def create_donut(df,col1, col2, title):
    plot = Donut(df, label=col1, values=col2,
                 color=Spectral11, title=title)
    return plot

In [None]:
show(donut)

In [None]:
dfbig = df

In [None]:
mask = dfbig['teams'].isin(['Other', 'Unaffiliated'])
df2 = dfbig[~mask]

In [None]:
df2['Prize_USD']

In [None]:
p = nlargest(df2,'teams','Prize_USD',5)
donut = create_donut(p, 'teams','Prize_USD','Top 5 Teams')

In [None]:
show(donut)

In [None]:
df.describe()

In [None]:
len(df.PlayerId.unique())

In [None]:
i= (list(df.columns))
for j in i:
    print(j)

In [102]:
dffi = pd.read_json('dffi.json')
show(create_bar_chart3(dffi,'Feature','Importance','Feature Importance - Random Forests'))

In [63]:
dffi.to_json('dffi.json')

In [99]:
g = pd.read_json('roc.json')

In [100]:
show(roc_print(g))

In [75]:
g

Unnamed: 0,fprGB,fprLR,fprRF,tprGB,tprLR,tprRF
0,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
1,0.999476,0.999476,1.000000,1.000000,1.000000,1.000000
10,0.994759,0.994759,0.994759,1.000000,1.000000,1.000000
100,0.956499,0.954927,0.952830,0.991285,0.992375,0.994553
1000,0.601677,0.604822,0.607442,0.872549,0.866013,0.875272
1001,0.601677,0.604298,0.607442,0.872549,0.866013,0.875272
1002,0.601677,0.603774,0.607442,0.872549,0.866013,0.875272
1003,0.601677,0.603774,0.607442,0.872549,0.865468,0.875272
1004,0.601677,0.603249,0.607442,0.872549,0.865468,0.875272
1005,0.601677,0.603249,0.607442,0.872549,0.865468,0.875272


In [82]:
LR = []
for i in range(len(fprLR)):
    LR.append([fprLR[i],tprLR[i]])
GB = []
for i in range(len(fprGB)):
    GB.append([fprGB[i],tprGB[i]])
RF = []
for i in range(len(fprRF)):
    RF.append([fprRF[i],tprRF[i]])

In [84]:
roc = pd.DataFrame({})
cv = ['LR','GB','RF']
vc = [LR,GB,RF]
for i in range(len(cv)):
    roc[cv[i]]=vc[i]

In [86]:
roc.to_json('roc.json')

In [87]:
roc = pd.read_json('roc.json')

In [90]:
LR = list(roc['LR'])
GB = list(roc['GB'])
RF = list(roc['RF'])
fprLR = [i[0] for i in LR]
tprLR = [i[1] for i in LR]
fprGB = [i[0] for i in GB]
tprGB = [i[1] for i in GB]
fprRF = [i[0] for i in RF]
tprRF = [i[1] for i in RF]

In [95]:
fprLR = [i[0] for i in LR]
tprLR = [i[1] for i in LR]
fprGB = [i[0] for i in GB]
tprGB = [i[1] for i in GB]
fprRF = [i[0] for i in RF]
tprRF = [i[1] for i in RF]

In [96]:
tprLR

[1.0,
 1.0,
 1.0,
 0.9923747277,
 0.8660130719,
 0.8660130719,
 0.8660130719,
 0.8654684096,
 0.8654684096,
 0.8654684096,
 0.8654684096,
 0.8654684096,
 0.8654684096,
 0.8654684096,
 0.9918300654000001,
 0.8654684096,
 0.8649237473,
 0.8649237473,
 0.8649237473,
 0.8649237473,
 0.8649237473,
 0.8649237473,
 0.8649237473,
 0.8649237473,
 0.8649237473,
 0.9918300654000001,
 0.8649237473,
 0.8649237473,
 0.8649237473,
 0.8649237473,
 0.8649237473,
 0.8649237473,
 0.8649237473,
 0.8649237473,
 0.8649237473,
 0.8649237473,
 0.9918300654000001,
 0.8649237473,
 0.8649237473,
 0.8649237473,
 0.8616557734,
 0.8616557734,
 0.8616557734,
 0.8611111111,
 0.8605664488,
 0.8605664488,
 0.8600217865,
 0.9918300654000001,
 0.8600217865,
 0.8600217865,
 0.8600217865,
 0.8600217865,
 0.8600217865,
 0.8594771242,
 0.8594771242,
 0.8594771242,
 0.8594771242,
 0.8594771242,
 0.9918300654000001,
 0.8589324619,
 0.8589324619,
 0.8589324619,
 0.8589324619,
 0.8583877996,
 0.8578431373000001,
 0.8578431373000

In [101]:
dffi

Unnamed: 0,Feature,Importance
0,1-currency_KRW,0.159153
1,2-currency_EUR,0.13079
2,3-AverageWinsToDate,0.108823
3,4-PrizeToDate,0.100037
4,5-PriorWins,0.098628
5,6-SecondWins,0.060684
6,7-currency_USD,0.059146
7,8-Age_at_win,0.051229
