In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import scale
from sklearn.preprocessing import StandardScaler

In [2]:
lh_data = pd.read_csv("lh.csv", na_values="-1")
gold_data = pd.read_csv("gold.csv", na_values="-1")
train_data = pd.read_csv("train.csv", na_values="-1")

In [3]:
data = gold_data[gold_data["times"] == 600].add_prefix("gold_").merge(
    lh_data[lh_data["times"] == 600].add_prefix("lh_"), left_on="gold_mid", right_on="lh_mid", how="inner")
data.drop(["lh_times", "gold_mid", "gold_times"], axis=1, inplace=True)
data

Unnamed: 0,gold_player_0,gold_player_1,gold_player_2,gold_player_3,gold_player_4,gold_player_5,gold_player_6,gold_player_7,gold_player_8,gold_player_9,...,lh_player_0,lh_player_1,lh_player_2,lh_player_3,lh_player_4,lh_player_5,lh_player_6,lh_player_7,lh_player_8,lh_player_9
0,3454,5206,2613,4426,5755,4072,3997,5917,1725,6384,...,4,43,3,57,41,34,35,75,2,46
1,2477,5760,3816,4353,5759,7659,5066,2748,4440,4623,...,5,63,14,28,47,49,23,3,30,39
2,3604,1948,8581,4390,2869,3096,2301,5130,2530,2491,...,34,6,69,42,23,6,12,56,13,12
3,3457,5464,4432,2961,4314,3345,4791,1906,5328,2247,...,1,49,48,3,31,28,51,6,50,8
4,3675,4103,5154,3030,2076,3920,3494,3392,4458,2220,...,30,41,45,23,4,26,9,23,37,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49943,3754,3556,5388,3391,5634,2853,4447,3636,8245,5598,...,28,20,37,18,52,3,43,16,50,40
49944,4895,2580,4109,5610,2732,3564,5763,5538,4005,3027,...,39,1,21,62,6,1,58,50,10,15
49945,2325,1813,2530,4807,5542,2242,2359,5997,5523,2044,...,13,6,2,37,67,7,7,61,49,2
49946,4134,3141,4086,4405,3517,2887,7450,3634,5431,5451,...,2,24,15,23,30,9,72,4,51,36


In [4]:
radiant_players_gold = ["gold_player_" + str(i) for i in range(0, 5)]
radiant_players_lh = ["lh_player_" + str(i) for i in range(0, 5)]
dire_players_gold = ["gold_player_" + str(i) for i in range(5, 10)]
dire_players_lh = ["lh_player_" + str(i) for i in range(5, 10)]

In [5]:
data.loc[:, "radiant_sum_gold"] = data[radiant_players_gold].sum(axis=1)
data.loc[:, "radiant_max_gold"] = data[radiant_players_gold].max(axis=1)
data.loc[:, "radiant_min_gold"] = data[radiant_players_gold].min(axis=1)

data.loc[:, "dire_sum_gold"] = data[dire_players_gold].sum(axis=1)
data.loc[:, "dire_max_gold"] = data[dire_players_gold].max(axis=1)
data.loc[:, "dire_min_gold"] = data[dire_players_gold].min(axis=1)

data.loc[:, "radiant_max_lh"] = data[radiant_players_lh].max(axis=1)
data.loc[:, "radiant_min_lh"] = data[radiant_players_lh].min(axis=1)
data.loc[:, "radiant_sum_lh"] = data[radiant_players_lh].sum(axis=1)

data.loc[:, "dire_max_lh"] = data[dire_players_lh].max(axis=1)
data.loc[:, "dire_min_lh"] = data[dire_players_lh].min(axis=1)
data.loc[:, "dire_sum_lh"] = data[dire_players_lh].sum(axis=1)

data.drop(radiant_players_gold + radiant_players_lh + dire_players_gold + dire_players_lh, inplace=True, axis=1)

data.set_index("lh_mid", inplace=True)
data.index.rename('mid', inplace=True)

In [6]:
xp_data = pd.read_csv("xp.csv", index_col="mid")

In [235]:
xp_data

Unnamed: 0_level_0,times,player_0,player_1,player_2,player_3,player_4,player_5,player_6,player_7,player_8,player_9
mid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,60,79,214,147,222,147,94,78,396,94,147
0,120,321,719,423,777,421,490,607,895,241,365
0,180,356,1333,424,1300,638,922,937,1259,242,590
0,240,544,1752,441,1782,1348,1460,1163,2037,242,658
0,300,724,2002,565,2087,1807,2102,1498,2389,276,1020
...,...,...,...,...,...,...,...,...,...,...,...
49947,360,1826,708,2649,1568,2124,2283,858,1337,2759,2360
49947,420,2068,1153,3081,1878,2700,2533,906,1618,3259,2764
49947,480,2358,1152,3539,2266,2881,3057,1029,1811,3911,3260
49947,540,2698,1168,5034,2721,3107,3300,1217,2151,4116,3901


In [7]:
data_with_xp = data.merge(xp_data[xp_data['times'] == 600], left_index=True, right_index=True)

In [8]:
data_with_xp

Unnamed: 0_level_0,radiant_sum_gold,radiant_max_gold,radiant_min_gold,dire_sum_gold,dire_max_gold,dire_min_gold,radiant_max_lh,radiant_min_lh,radiant_sum_lh,dire_max_lh,...,player_0,player_1,player_2,player_3,player_4,player_5,player_6,player_7,player_8,player_9
mid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,21454,5755,2613,22095,6384,1725,57,3,148,75,...,1696,4304,1606,4429,3821,3745,3635,4846,357,2347
1,22165,5760,2477,24536,7659,2748,63,5,157,49,...,1542,4481,1961,2437,4810,4514,3510,2248,3403,3662
2,21392,8581,1948,15548,5130,2301,69,6,174,56,...,2851,1986,6234,2667,4265,2402,1541,3212,2864,2426
3,20628,5464,2961,17617,5328,1906,49,1,132,51,...,1145,4640,4202,1769,3786,3529,2962,1400,4512,1530
4,18038,5154,2076,17484,4458,2220,45,4,143,37,...,2197,3525,4855,3239,1518,3373,1935,2563,3281,1924
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49943,21723,5634,3391,24779,8245,2853,52,18,155,50,...,1973,2317,4578,2366,4070,1911,3200,2245,6009,3850
49944,19926,5610,2580,21897,5763,3027,62,1,129,58,...,4117,1190,3020,3774,2062,1425,4889,4347,3218,2966
49945,17017,5542,1813,18165,5997,2044,67,2,125,61,...,1999,2236,1024,4695,3544,2365,1783,3339,4448,1271
49946,19283,4405,3141,24853,7450,2887,30,2,94,72,...,2293,3086,2518,3612,2686,2573,4360,2395,5008,2929


In [9]:
data_with_xp.loc[:, "radiant_sum_xp"] = data_with_xp[['player_' + str(i) for i in range(0, 5)]].sum(axis=1)
data_with_xp.loc[:, "radiant_max_xp"] = data_with_xp[['player_' + str(i) for i in range(0, 5)]].max(axis=1)
data_with_xp.loc[:, "radiant_min_xp"] = data_with_xp[['player_' + str(i) for i in range(0, 5)]].min(axis=1)

data_with_xp.loc[:, "dire_sum_xp"] = data_with_xp[['player_' + str(i) for i in range(5, 10)]].sum(axis=1)
data_with_xp.loc[:, "dire_max_xp"] = data_with_xp[['player_' + str(i) for i in range(5, 10)]].max(axis=1)
data_with_xp.loc[:, "dire_min_xp"] = data_with_xp[['player_' + str(i) for i in range(5, 10)]].min(axis=1)

data_with_xp

Unnamed: 0_level_0,radiant_sum_gold,radiant_max_gold,radiant_min_gold,dire_sum_gold,dire_max_gold,dire_min_gold,radiant_max_lh,radiant_min_lh,radiant_sum_lh,dire_max_lh,...,player_6,player_7,player_8,player_9,radiant_sum_xp,radiant_max_xp,radiant_min_xp,dire_sum_xp,dire_max_xp,dire_min_xp
mid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,21454,5755,2613,22095,6384,1725,57,3,148,75,...,3635,4846,357,2347,15856,4429,1606,14930,4846,357
1,22165,5760,2477,24536,7659,2748,63,5,157,49,...,3510,2248,3403,3662,15231,4810,1542,17337,4514,2248
2,21392,8581,1948,15548,5130,2301,69,6,174,56,...,1541,3212,2864,2426,18003,6234,1986,12445,3212,1541
3,20628,5464,2961,17617,5328,1906,49,1,132,51,...,2962,1400,4512,1530,15542,4640,1145,13933,4512,1400
4,18038,5154,2076,17484,4458,2220,45,4,143,37,...,1935,2563,3281,1924,15334,4855,1518,13076,3373,1924
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49943,21723,5634,3391,24779,8245,2853,52,18,155,50,...,3200,2245,6009,3850,15304,4578,1973,17215,6009,1911
49944,19926,5610,2580,21897,5763,3027,62,1,129,58,...,4889,4347,3218,2966,14163,4117,1190,16845,4889,1425
49945,17017,5542,1813,18165,5997,2044,67,2,125,61,...,1783,3339,4448,1271,13498,4695,1024,13206,4448,1271
49946,19283,4405,3141,24853,7450,2887,30,2,94,72,...,4360,2395,5008,2929,14195,3612,2293,17265,5008,2395


In [10]:
data_with_xp.drop(['player_' + str(i) for i in range(0, 10)], inplace=True, axis=1)

In [11]:
data_with_xp.drop("times", inplace=True, axis=1)

In [12]:
heros_data = pd.read_csv("heroes.csv", index_col='mid')

In [54]:
heros_data

Unnamed: 0_level_0,player_0,player_1,player_2,player_3,player_4,player_5,player_6,player_7,player_8,player_9
mid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,91,42,87,15,65,11,6,34,69,74
1,69,85,71,24,64,74,68,39,65,11
2,17,40,31,67,99,32,7,72,48,104
3,80,43,101,71,94,69,70,98,24,39
4,25,15,75,29,95,3,32,55,64,86
...,...,...,...,...,...,...,...,...,...,...
49943,3,89,75,81,29,40,94,47,56,68
49944,64,92,69,82,18,104,81,29,5,28
49945,99,109,96,15,2,69,91,53,77,87
49946,69,32,64,65,8,21,22,40,75,59


In [59]:
encoder = OneHotEncoder(sparse=False)

radiant = pd.DataFrame()

radiant = encoder.fit_transform(heros_data[["player_0"]])+ encoder.fit_transform(heros_data[["player_1"]]) + \
            encoder.fit_transform(heros_data[["player_2"]]) + encoder.fit_transform(heros_data[["player_3"]]) + \
encoder.fit_transform(heros_data[["player_4"]])

dire =  ( encoder.fit_transform(heros_data[["player_5"]]) + encoder.fit_transform(heros_data[["player_6"]]) + \
    encoder.fit_transform(heros_data[["player_7"]]) + encoder.fit_transform(heros_data[["player_8"]])  + \
    encoder.fit_transform(heros_data[["player_9"]]))

In [60]:
a = np.hstack((radiant, dire))
a

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [72]:
heros_data

Unnamed: 0_level_0,player_0,player_1,player_2,player_3,player_4,player_5,player_6,player_7,player_8,player_9
mid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,91,42,87,15,65,11,6,34,69,74
1,69,85,71,24,64,74,68,39,65,11
2,17,40,31,67,99,32,7,72,48,104
3,80,43,101,71,94,69,70,98,24,39
4,25,15,75,29,95,3,32,55,64,86
...,...,...,...,...,...,...,...,...,...,...
49943,3,89,75,81,29,40,94,47,56,68
49944,64,92,69,82,18,104,81,29,5,28
49945,99,109,96,15,2,69,91,53,77,87
49946,69,32,64,65,8,21,22,40,75,59


In [96]:
gold_wih_heroes = k.copy()

gold_info = gold_data[gold_data['times'] == 600].copy()
gold_info.set_index('mid', inplace=True)

counter = 0

for index, row in heros_data.iterrows():
    for j in range(0, 5):
        gold_wih_heroes.loc[index, row['player_' + str(j)]] *= \
        gold_info.loc[index, 'player_' + str(j)]
        
    for j in range(0, 5):
        gold_wih_heroes.loc[index, row['player_' + str(j)] + 110] *= \
        gold_info.loc[index, 'player_' + str(j)]

In [16]:
heroes_encoded = pd.DataFrame(a, columns=["radiant_hero_" + str(i) for i in range(0, radiant.shape[1])] + \
                              ["dire_hero_" + str(i) for i in range(0, dire.shape[1])])

In [17]:
heroes_encoded.sum(axis=1)

0        0.0
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
49943    0.0
49944    0.0
49945    0.0
49946    0.0
49947    0.0
Length: 49948, dtype: float64

In [18]:
scaler = StandardScaler()

In [20]:
items_data = pd.read_csv("items.csv", )

In [21]:
items_data.fillna(0, inplace=True)

In [22]:
items_data

Unnamed: 0,mid,player,item_0,item_1,item_2,item_3,item_4,item_5,item_6,item_7,...,item_111,item_112,item_113,item_114,item_115,item_116,item_117,item_118,item_119,item_120
0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0,3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
499407,49947,5,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
499408,49947,6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0
499409,49947,7,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
499410,49947,8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [23]:
items_data.set_index('mid', inplace=True)

In [24]:
items_data['team'] = items_data.apply(lambda row: 'radiant' if row['player'] < 5 else 'dire', axis = 1)

In [25]:
items_sum = items_data.groupby(['mid', 'team']).sum()

In [26]:
items_sum = items_sum.unstack(level='team')

In [27]:
items_sum.columns = ['_'.join(col) for col in items_sum.columns]

In [257]:
items_sum

Unnamed: 0_level_0,player_dire,player_radiant,item_0_dire,item_0_radiant,item_1_dire,item_1_radiant,item_2_dire,item_2_radiant,item_3_dire,item_3_radiant,...,item_116_dire,item_116_radiant,item_117_dire,item_117_radiant,item_118_dire,item_118_radiant,item_119_dire,item_119_radiant,item_120_dire,item_120_radiant
mid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,35,10,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,35,10,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,35,10,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,35,10,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,35,10,0.0,0.0,4.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49943,35,10,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,...,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
49944,35,10,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,...,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
49945,35,10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
49946,35,10,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [28]:
items_sum.drop(["player_dire", "player_radiant"], axis=1, inplace=True)

In [29]:
events_data = pd.read_csv("events.csv")

In [30]:
events_data

Unnamed: 0,mid,event_type,from_team,time
0,0,3,radiant,1
1,1,3,radiant,222
2,2,3,dire,143
3,3,3,radiant,143
4,4,3,dire,53
...,...,...,...,...
69115,49945,3,dire,432
69116,49945,6,dire,498
69117,49946,3,dire,69
69118,49946,6,dire,439


In [31]:
events_agr = pd.DataFrame(events_data[events_data['time'] <= 600].groupby(["mid", 'event_type', 'from_team']).size(), columns=['count'])

In [32]:
events_sum = events_agr.unstack(['from_team', 'event_type'], fill_value=0)

In [33]:
events_sum.columns = events_sum.columns.droplevel(0)

In [264]:
events_sum

from_team,radiant,dire,radiant,dire,radiant,dire,dire,dire,radiant,radiant,radiant,dire,dire,radiant
event_type,3,3,6,6,5,5,0,4,0,4,1,2,1,2
mid,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,1,0,0,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,1,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49943,1,0,0,1,0,0,0,0,0,0,0,0,0,0
49944,0,1,0,0,0,0,0,0,0,0,0,0,0,0
49945,0,1,0,1,0,0,0,0,0,0,0,0,0,0
49946,0,1,0,1,0,0,0,0,0,0,0,0,0,0


In [265]:
events_sum.columns

MultiIndex([('radiant', 3),
            (   'dire', 3),
            ('radiant', 6),
            (   'dire', 6),
            ('radiant', 5),
            (   'dire', 5),
            (   'dire', 0),
            (   'dire', 4),
            ('radiant', 0),
            ('radiant', 4),
            ('radiant', 1),
            (   'dire', 2),
            (   'dire', 1),
            ('radiant', 2)],
           names=['from_team', 'event_type'])

In [34]:
events_sum.columns = [team + '_' + str(num) for team, num in events_sum.columns]

In [35]:
events_sum = events_sum.reindex(sorted(events_sum.columns), axis=1)

In [36]:
events_sum.head(10)

Unnamed: 0_level_0,dire_0,dire_1,dire_2,dire_3,dire_4,dire_5,dire_6,radiant_0,radiant_1,radiant_2,radiant_3,radiant_4,radiant_5,radiant_6
mid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
1,0,0,0,0,0,0,0,0,0,0,1,0,0,0
2,0,0,0,1,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,1,0,0,0
4,0,0,0,1,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,1,0,0,1
7,0,0,0,1,0,0,0,0,0,0,0,0,0,2
8,0,0,0,1,0,0,1,0,0,0,0,0,0,0
9,0,0,0,1,0,0,0,0,0,0,0,0,0,0
10,0,0,0,0,0,0,0,0,0,0,1,0,0,0


In [112]:
gold_wih_heroes

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,212,213,214,215,216,217,218,219,220,221
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49943,0.0,0.0,0.0,3754.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
49944,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
49945,0.0,0.0,5542.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
49946,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3517.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [37]:
data_with_xp = pd.DataFrame(scaler.fit_transform(data_with_xp), columns=data_with_xp.columns, index = data_with_xp.index)

In [38]:
data_with_xp = data_with_xp.join(items_sum)

In [39]:
data_with_xp = data_with_xp.join(events_sum, how='inner')

In [136]:
data_with_heroes_xp = data_with_xp.join(pd.DataFrame(scaler.fit_transform(gold_wih_heroes)))

In [137]:
test_data = pd.read_csv("test.csv")

In [138]:
data_for_test = test_data.merge(data_with_heroes_xp, right_index=True, left_on='mid', how='left')

In [139]:
data_for_test.drop("mid", axis=1, inplace=True)

In [140]:
data_for_test.fillna(0, inplace=True)

In [141]:
data_for_train = pd.merge(data_with_heroes_xp, train_data, left_index=True, right_on='mid', how='inner')

In [142]:
data_for_train.drop("mid", axis=1, inplace=True)

In [143]:
target = data_for_train.pop('radiant_won')

radiant_sum_gold    False
radiant_max_gold    False
radiant_min_gold    False
dire_sum_gold       False
dire_max_gold       False
                    ...  
dire_hero_106       False
dire_hero_107       False
dire_hero_108       False
dire_hero_109       False
dire_hero_110       False
Length: 496, dtype: bool

In [144]:
log_reg_pipe_multy = LogisticRegression(max_iter=100000, solver='liblinear')

log_reg_scale_params_multy_xp = {'penalty' : ['l1', 'l2'],
                              'C' : [i for i in np.logspace(-3, 3, 7)]}                        

log_reg_clf_multy_xp = GridSearchCV(log_reg_pipe_multy, log_reg_scale_params_multy_xp, scoring='roc_auc', n_jobs=-1, cv=5)

In [None]:
log_reg_clf_multy_xp.fit(data_for_train, target)

In [242]:
res = result5.copy()

In [123]:
log_reg_clf_multy_xp.cv_results_

{'mean_fit_time': array([25.38962493, 32.23082848, 28.79061742, 34.83443723, 30.48157778,
        37.25841293, 29.86320143, 43.47922277, 33.75707841, 48.41762652,
        35.85706553, 47.74775133, 32.60698586, 52.41953106, 33.50338979,
        45.81564503, 23.64722071, 28.29811511, 17.86400332, 23.45444708]),
 'std_fit_time': array([4.02545157, 2.76113401, 4.26477897, 2.36677672, 4.52410167,
        4.2002046 , 2.68572555, 4.28742739, 3.06935196, 5.16944307,
        3.90459401, 7.51757864, 4.60282316, 4.93061279, 2.7840286 ,
        2.88851752, 5.2075253 , 1.32008571, 1.8997597 , 1.33609604]),
 'mean_score_time': array([0.31014371, 0.20710325, 0.2168859 , 0.17893038, 0.1838727 ,
        0.18320541, 0.14556208, 0.18604918, 0.28976426, 0.17338314,
        0.27245231, 0.18147483, 0.29919405, 0.28730931, 0.21887183,
        0.07733088, 0.07282953, 0.06790776, 0.10280867, 0.04061737]),
 'std_score_time': array([0.1360206 , 0.09607345, 0.11275584, 0.05565336, 0.09240737,
        0.0510449 , 

0.76075083
0.76075196

0.76236785

0.76567471
0.76625249
0.76631909

0.76617235

0.76055547

0.75803188

In [287]:
svc_parms = {'C': [i for i in np.arange(0.1, 1.5, 0.1)], 
             'kernel' : ['poly', 'rbf', 'sigmoid']}
svc = SVC(max_iter=20000, verbose=True)
svc_clf = GridSearchCV(svc, svc_parms, scoring='roc_auc', n_jobs=-1, cv=5)

In [328]:
svc_clf.fit(data_for_train, target )

NameError: name 'svc_clf' is not defined

In [277]:
svc_clf.cv_results_

{'mean_fit_time': array([6.42945795, 4.58632817, 9.23084579, 5.86177521, 5.83884621,
        6.96167645, 4.74170551, 7.26357002, 8.52778306, 5.09948072]),
 'std_fit_time': array([3.02773355, 1.6332551 , 5.0157206 , 1.65927766, 1.31841451,
        2.47779436, 1.66348419, 4.11994137, 2.37824849, 1.56794555]),
 'mean_score_time': array([0.06487203, 0.05697522, 0.05492301, 0.0473917 , 0.05224781,
        0.05417233, 0.04697137, 0.04335623, 0.03720393, 0.03143806]),
 'std_score_time': array([0.01457834, 0.02289509, 0.0116445 , 0.0095709 , 0.01168122,
        0.00956299, 0.00834084, 0.01001687, 0.01754095, 0.01140868]),
 'param_C': masked_array(data=[0.5, 0.6, 0.7, 0.7999999999999999, 0.8999999999999999,
                    0.9999999999999999, 1.0999999999999999,
                    1.1999999999999997, 1.2999999999999998, 1.4],
              mask=[False, False, False, False, False, False, False, False,
                    False, False],
        fill_value='?',
             dtype=object),
 'p

In [124]:
result_raw = log_reg_clf_multy_xp.predict_proba(data_for_test)

In [279]:
mlcp_params = {'hidden_layer_sizes': [35, 32, 30, 28, 25, 23, 20, 19, 17, 15, 13, 10, 8, 5, 2],
              'activation' : ['logistic', 'tanh', 'relu']}


perceptron = MLPClassifier(activation='logistic')
mplr_clf = GridSearchCV(perceptron, mlcp_params, scoring='roc_auc', n_jobs=-1, cv=5)

In [281]:
mplr_clf.fit(data_for_train, target)

GridSearchCV(cv=5, estimator=MLPClassifier(activation='logistic'), n_jobs=-1,
             param_grid={'activation': ['logistic', 'tanh', 'relu'],
                         'hidden_layer_sizes': [35, 32, 30, 28, 25, 23, 20, 19,
                                                17, 15, 13, 10, 8, 5, 2]},
             scoring='roc_auc')

In [282]:
mplr_clf.cv_results_

{'mean_fit_time': array([101.55167823,  62.28853502, 123.3159729 , 120.08171902,
        125.52208014, 147.01923385, 162.73759995, 133.49835958,
        100.11068521,  80.37599125,  85.44808664, 103.55071487,
         89.75465469, 117.02550774,  73.01282983, 204.47854991,
        190.74120708, 198.35513811, 169.11947441, 122.35598798,
        121.48866682, 113.76298194, 114.6133604 , 109.15546761,
        106.95847898, 102.90032716, 100.43945055,  99.18267794,
         98.86119838,  61.49700961, 123.80588975, 113.2167346 ,
        116.73021574, 112.5657824 , 109.49092731, 113.26187124,
        113.64550595, 155.60574102, 130.4505559 , 133.9963273 ,
        107.67674522,  97.95571389, 141.08107257, 100.9073709 ,
         51.43307738]),
 'std_fit_time': array([74.45775472, 57.88390693, 73.61961112, 70.06065873, 74.8116455 ,
        53.05233923,  3.29482753, 52.95243088, 60.41118893, 59.33707147,
        49.34669375, 42.20494232, 49.44041106, 50.50769989, 28.52704352,
        13.42524663,

In [256]:
data_for_test

Unnamed: 0,radiant_sum_gold,radiant_max_gold,radiant_min_gold,dire_sum_gold,dire_max_gold,dire_min_gold,radiant_max_lh,radiant_min_lh,radiant_sum_lh,dire_max_lh,...,dire_hero_101,dire_hero_102,dire_hero_103,dire_hero_104,dire_hero_105,dire_hero_106,dire_hero_107,dire_hero_108,dire_hero_109,dire_hero_110
0,0.207340,-0.264248,0.813984,-0.926862,-0.342132,-1.188654,-0.213023,-1.142926,-0.101378,-0.037065,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2.205336,-0.171793,2.700292,1.004050,-0.041490,1.327962,-1.056118,1.112620,-0.101378,-0.873671,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,-1.151112,-1.116887,-0.746902,0.505302,0.446218,1.567012,-0.289668,0.862004,-0.170544,0.115045,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.389848,-0.553753,0.564242,0.046315,1.051320,0.083771,-0.519603,-0.391077,-0.550960,-0.797616,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,-0.103996,-0.192339,-0.207687,0.843849,-0.671407,1.390077,-0.749538,1.112620,-0.032211,0.647431,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24969,1.138281,2.133041,0.715601,1.320979,0.591290,0.733160,-0.672893,-0.391077,-0.965960,0.799541,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
24970,-1.165299,-1.626790,0.157466,0.104991,-0.825068,0.488463,-2.359083,-0.641694,-2.210958,-2.318718,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
24971,0.627184,-0.105487,1.627537,1.837871,2.441909,0.593871,0.016912,3.117551,0.694038,-0.113120,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
24972,-0.061820,-0.127900,0.093138,0.725338,0.073040,0.921388,0.783362,-1.142926,-0.205128,0.495320,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [246]:
result5.to_csv("answer4.csv", index=False)

In [114]:
result_raw[:, 1:2].flatten()

array([0.75340524, 0.55867831, 0.17710645, ..., 0.34507832, 0.45254833,
       0.84239178])

In [317]:
an = result6.copy()

In [125]:
result6 = pd.DataFrame({'mid': test_data['mid'], 'radiant_win': result_raw[:, 1:2].flatten()})

In [126]:
result6.to_csv("answer40.csv", index=False)

In [320]:
(an != result6).sum()

mid                0
radiant_win    24974
dtype: int64