In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import scale
from sklearn.preprocessing import StandardScaler

In [2]:
lh_data = pd.read_csv("lh.csv", na_values="-1")
gold_data = pd.read_csv("gold.csv", na_values="-1")
train_data = pd.read_csv("train.csv", na_values="-1")

In [3]:
data = gold_data[gold_data["times"] == 600].add_prefix("gold_").merge(
    lh_data[lh_data["times"] == 600].add_prefix("lh_"), left_on="gold_mid", right_on="lh_mid", how="inner")
data.drop(["lh_times", "gold_mid", "gold_times"], axis=1, inplace=True)
data

Unnamed: 0,gold_player_0,gold_player_1,gold_player_2,gold_player_3,gold_player_4,gold_player_5,gold_player_6,gold_player_7,gold_player_8,gold_player_9,...,lh_player_0,lh_player_1,lh_player_2,lh_player_3,lh_player_4,lh_player_5,lh_player_6,lh_player_7,lh_player_8,lh_player_9
0,3454,5206,2613,4426,5755,4072,3997,5917,1725,6384,...,4,43,3,57,41,34,35,75,2,46
1,2477,5760,3816,4353,5759,7659,5066,2748,4440,4623,...,5,63,14,28,47,49,23,3,30,39
2,3604,1948,8581,4390,2869,3096,2301,5130,2530,2491,...,34,6,69,42,23,6,12,56,13,12
3,3457,5464,4432,2961,4314,3345,4791,1906,5328,2247,...,1,49,48,3,31,28,51,6,50,8
4,3675,4103,5154,3030,2076,3920,3494,3392,4458,2220,...,30,41,45,23,4,26,9,23,37,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49943,3754,3556,5388,3391,5634,2853,4447,3636,8245,5598,...,28,20,37,18,52,3,43,16,50,40
49944,4895,2580,4109,5610,2732,3564,5763,5538,4005,3027,...,39,1,21,62,6,1,58,50,10,15
49945,2325,1813,2530,4807,5542,2242,2359,5997,5523,2044,...,13,6,2,37,67,7,7,61,49,2
49946,4134,3141,4086,4405,3517,2887,7450,3634,5431,5451,...,2,24,15,23,30,9,72,4,51,36


In [4]:
radiant_players_gold = ["gold_player_" + str(i) for i in range(0, 5)]
radiant_players_lh = ["lh_player_" + str(i) for i in range(0, 5)]
dire_players_gold = ["gold_player_" + str(i) for i in range(5, 10)]
dire_players_lh = ["lh_player_" + str(i) for i in range(5, 10)]

In [5]:
data.loc[:, "radiant_sum_gold"] = data[radiant_players_gold].sum(axis=1)
data.loc[:, "radiant_max_gold"] = data[radiant_players_gold].max(axis=1)
data.loc[:, "radiant_min_gold"] = data[radiant_players_gold].min(axis=1)

data.loc[:, "dire_sum_gold"] = data[dire_players_gold].sum(axis=1)
data.loc[:, "dire_max_gold"] = data[dire_players_gold].max(axis=1)
data.loc[:, "dire_min_gold"] = data[dire_players_gold].min(axis=1)

data.loc[:, "radiant_max_lh"] = data[radiant_players_lh].max(axis=1)
data.loc[:, "radiant_min_lh"] = data[radiant_players_lh].min(axis=1)
data.loc[:, "radiant_sum_lh"] = data[radiant_players_lh].sum(axis=1)

data.loc[:, "dire_max_lh"] = data[dire_players_lh].max(axis=1)
data.loc[:, "dire_min_lh"] = data[dire_players_lh].min(axis=1)
data.loc[:, "dire_sum_lh"] = data[dire_players_lh].sum(axis=1)

data.drop(radiant_players_gold + radiant_players_lh + dire_players_gold + dire_players_lh, inplace=True, axis=1)

data.set_index("lh_mid", inplace=True)
data.index.rename('mid', inplace=True)

In [7]:
heros_data = pd.read_csv("heroes.csv", index_col='mid')

In [8]:
heroes_encoded = pd.DataFrame(OneHotEncoder(sparse=False).fit_transform(heros_data))

In [9]:
data_with_heroes = data.join(heroes_encoded)

In [10]:
xp_data = pd.read_csv("xp.csv", index_col="mid")

In [11]:
xp_data

Unnamed: 0_level_0,times,player_0,player_1,player_2,player_3,player_4,player_5,player_6,player_7,player_8,player_9
mid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,60,79,214,147,222,147,94,78,396,94,147
0,120,321,719,423,777,421,490,607,895,241,365
0,180,356,1333,424,1300,638,922,937,1259,242,590
0,240,544,1752,441,1782,1348,1460,1163,2037,242,658
0,300,724,2002,565,2087,1807,2102,1498,2389,276,1020
...,...,...,...,...,...,...,...,...,...,...,...
49947,360,1826,708,2649,1568,2124,2283,858,1337,2759,2360
49947,420,2068,1153,3081,1878,2700,2533,906,1618,3259,2764
49947,480,2358,1152,3539,2266,2881,3057,1029,1811,3911,3260
49947,540,2698,1168,5034,2721,3107,3300,1217,2151,4116,3901


In [12]:
data_with_heroes_xp = data_with_heroes.merge(xp_data[xp_data['times'] == 600], left_index=True, right_index=True)

In [13]:
data_with_heroes_xp

Unnamed: 0_level_0,radiant_sum_gold,radiant_max_gold,radiant_min_gold,dire_sum_gold,dire_max_gold,dire_min_gold,radiant_max_lh,radiant_min_lh,radiant_sum_lh,dire_max_lh,...,player_0,player_1,player_2,player_3,player_4,player_5,player_6,player_7,player_8,player_9
mid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,21454,5755,2613,22095,6384,1725,57,3,148,75,...,1696,4304,1606,4429,3821,3745,3635,4846,357,2347
1,22165,5760,2477,24536,7659,2748,63,5,157,49,...,1542,4481,1961,2437,4810,4514,3510,2248,3403,3662
2,21392,8581,1948,15548,5130,2301,69,6,174,56,...,2851,1986,6234,2667,4265,2402,1541,3212,2864,2426
3,20628,5464,2961,17617,5328,1906,49,1,132,51,...,1145,4640,4202,1769,3786,3529,2962,1400,4512,1530
4,18038,5154,2076,17484,4458,2220,45,4,143,37,...,2197,3525,4855,3239,1518,3373,1935,2563,3281,1924
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49943,21723,5634,3391,24779,8245,2853,52,18,155,50,...,1973,2317,4578,2366,4070,1911,3200,2245,6009,3850
49944,19926,5610,2580,21897,5763,3027,62,1,129,58,...,4117,1190,3020,3774,2062,1425,4889,4347,3218,2966
49945,17017,5542,1813,18165,5997,2044,67,2,125,61,...,1999,2236,1024,4695,3544,2365,1783,3339,4448,1271
49946,19283,4405,3141,24853,7450,2887,30,2,94,72,...,2293,3086,2518,3612,2686,2573,4360,2395,5008,2929


In [14]:
data_with_heroes_xp.loc[:, "radiant_sum_xp"] = data_with_heroes_xp[['player_' + str(i) for i in range(0, 5)]].sum(axis=1)
data_with_heroes_xp.loc[:, "radiant_max_xp"] = data_with_heroes_xp[['player_' + str(i) for i in range(0, 5)]].max(axis=1)
data_with_heroes_xp.loc[:, "radiant_min_xp"] = data_with_heroes_xp[['player_' + str(i) for i in range(0, 5)]].min(axis=1)

data_with_heroes_xp.loc[:, "dire_sum_xp"] = data_with_heroes_xp[['player_' + str(i) for i in range(5, 10)]].sum(axis=1)
data_with_heroes_xp.loc[:, "dire_max_xp"] = data_with_heroes_xp[['player_' + str(i) for i in range(5, 10)]].max(axis=1)
data_with_heroes_xp.loc[:, "dire_min_xp"] = data_with_heroes_xp[['player_' + str(i) for i in range(5, 10)]].min(axis=1)

data_with_heroes_xp

Unnamed: 0_level_0,radiant_sum_gold,radiant_max_gold,radiant_min_gold,dire_sum_gold,dire_max_gold,dire_min_gold,radiant_max_lh,radiant_min_lh,radiant_sum_lh,dire_max_lh,...,player_6,player_7,player_8,player_9,radiant_sum_xp,radiant_max_xp,radiant_min_xp,dire_sum_xp,dire_max_xp,dire_min_xp
mid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,21454,5755,2613,22095,6384,1725,57,3,148,75,...,3635,4846,357,2347,15856,4429,1606,14930,4846,357
1,22165,5760,2477,24536,7659,2748,63,5,157,49,...,3510,2248,3403,3662,15231,4810,1542,17337,4514,2248
2,21392,8581,1948,15548,5130,2301,69,6,174,56,...,1541,3212,2864,2426,18003,6234,1986,12445,3212,1541
3,20628,5464,2961,17617,5328,1906,49,1,132,51,...,2962,1400,4512,1530,15542,4640,1145,13933,4512,1400
4,18038,5154,2076,17484,4458,2220,45,4,143,37,...,1935,2563,3281,1924,15334,4855,1518,13076,3373,1924
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49943,21723,5634,3391,24779,8245,2853,52,18,155,50,...,3200,2245,6009,3850,15304,4578,1973,17215,6009,1911
49944,19926,5610,2580,21897,5763,3027,62,1,129,58,...,4889,4347,3218,2966,14163,4117,1190,16845,4889,1425
49945,17017,5542,1813,18165,5997,2044,67,2,125,61,...,1783,3339,4448,1271,13498,4695,1024,13206,4448,1271
49946,19283,4405,3141,24853,7450,2887,30,2,94,72,...,4360,2395,5008,2929,14195,3612,2293,17265,5008,2395


In [15]:
data_with_heroes_xp.drop(['player_' + str(i) for i in range(0, 10)], inplace=True, axis=1)

In [16]:
data_with_heroes_xp.drop("times", inplace=True, axis=1)

In [17]:
test_data = pd.read_csv("test.csv")

In [25]:
data_for_test = test_data.merge(data_with_heroes_xp, right_index=True, left_on='mid', how='left')

In [27]:
data_for_test.drop("mid", axis=1, inplace=True)

In [28]:
data_for_test.fillna(0, inplace=True)

In [18]:
data_for_train = pd.merge(data_with_heroes_xp, train_data, left_index=True, right_on='mid', how='inner')

In [20]:
data_for_train.drop("mid", axis=1, inplace=True)

In [19]:
target = data_for_train.pop('radiant_won')

In [52]:
scaler = StandardScaler()
scaler.fit(data_with_heroes_xp)

StandardScaler()

In [60]:
log_reg_pipe_multy = LogisticRegression(max_iter=100000, solver='liblinear')

log_reg_scale_params_multy_xp = {'penalty' : ['l1', 'l2'],
                              'C' : [i for i in np.logspace(-5, 3, num=9)]}                        

log_reg_clf_multy_xp = GridSearchCV(log_reg_pipe_multy, log_reg_scale_params_multy_xp, scoring='roc_auc', n_jobs=-1, cv=5)

In [None]:
log_reg_clf_multy_xp.fit(scaler.transform(data_for_train), target)

In [None]:
log_reg_clf_multy_xp.cv_results_

In [54]:
result_raw = log_reg_clf_multy_xp.predict(scaler.transform(data_for_test))

In [55]:
result = pd.DataFrame({'mid': test_data['mid'], 'radiant_win': result_raw}, dtype=int)

In [39]:
old = pd.read_csv("answer2.csv")

In [53]:
result1 = result.copy()

In [56]:
(result != result1).sum()

mid              0
radiant_win    373
dtype: int64

In [48]:
(result != old).sum()

mid            0
radiant_win    1
dtype: int64

In [45]:
result

Unnamed: 0,mid,radiant_win
0,3,1
1,7,1
2,9,0
3,10,1
4,12,0
...,...,...
24969,49936,0
24970,49942,0
24971,49943,0
24972,49944,0


In [37]:
old

Unnamed: 0,mid,radiant_win
0,3,1
1,7,1
2,9,0
3,10,1
4,12,0
...,...,...
24969,49936,0
24970,49942,0
24971,49943,0
24972,49944,0
