In [1]:
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties

### 导入raw data

In [2]:
champs=pd.read_csv('raw_data/champs.csv',encoding='utf-8')
matches=pd.read_csv('raw_data/matches.csv',encoding='utf-8')
participants=pd.read_csv('raw_data/participants.csv',encoding='utf-8')
stats1=pd.read_csv('raw_data/stats1.csv',encoding='utf-8')
stats2=pd.read_csv('raw_data/stats2.csv',encoding='utf-8')
stats_all = stats1.append(stats2)

  interactivity=interactivity, compiler=compiler, result=result)


### 合并raw data

In [3]:
df = pd.merge(participants,stats_all, how = 'left', on = ['id'], suffixes=('', '_stats'))
df = pd.merge(df , champs, how = 'left', left_on= 'championid', right_on='id',suffixes=('', '_champs') )
df = pd.merge(df, matches, how = 'left', left_on = 'matchid', right_on = 'id', suffixes=('', '_matches'))

### 数据预处理

In [4]:
def final_position(col):
    if col['role'] in ('DUO_SUPPORT', 'DUO_CARRY'):
        return col['role']
    else:
        return col['position']

In [5]:
df['adjposition'] = df.apply(final_position, axis = 1)
df['team'] = df['player'].apply(lambda x: '1' if x <= 5 else '2')
df['team_role'] = df['team'] + ' - ' + df['adjposition']

In [6]:
remove_index = []
for i in ('1 - MID', '1 - TOP', '1 - DUO_SUPPORT', '1 - DUO_CARRY', '1 - JUNGLE',
          '2 - MID', '2 - TOP', '2 - DUO_SUPPORT', '2 - DUO_CARRY', '2 - JUNGLE'):
    df_remove = df[df['team_role'] == i].groupby('matchid').agg({'team_role':'count'})
    remove_index.extend(df_remove[df_remove['team_role'] != 1].index.values)
# remove unclassified BOT, correct ones should be DUO_SUPPORT OR DUO_CARRY
remove_index.extend(df[df['adjposition'] == 'BOT']['matchid'].unique())
remove_index = list(set(remove_index))

print('# matches in dataset before cleaning:{}'.format(df['matchid'].nunique()))
df = df[~df['matchid'].isin(remove_index)]
print('# matches in dataset after cleaning: {}'.format(df['matchid'].nunique()))
print('row_num',df.shape[0])

# matches in dataset before cleaning:184069
# matches in dataset after cleaning: 148638
row_num 1486362


In [7]:
#缺失值删除
df_null=df[df.isnull().values==True]
matchids=list(df_null['matchid'].drop_duplicates())
df = df[~df.matchid.isin(matchids)]

### 对局信息

In [8]:
##################################################################################
#                        df_3：对局信息                                           #
##################################################################################
df_3 = df[['matchid', 'player', 'name', 'team_role', 'win']]
df_3 = df_3.pivot(index= 'matchid', columns= 'team_role', values= 'name')
df_3 = df_3.reset_index()
df_3 = df_3.merge(df[df['player'] == 1][['matchid', 'win']], left_on = 'matchid', right_on = 'matchid', how = 'left')
#df_3 = df_3[df_3.columns.difference(['matchid'])]
df_3 = df_3.rename(columns = {'win': 'T1 win'})
df_3 = df_3.dropna()
#print(df_3.isnull().any())

### 构造英雄特征

In [9]:
def extracting(df,df_3):
    #按matchid, team提取total_kills
    df_tk = df[['kills']].groupby([df['matchid'],df['team']]).sum().rename(columns={'kills':'total_kills'}).reset_index()
    df_tk['team'] = df_tk['team'].astype('int64')

    #按name提取k,d,a,KDA,dtoturrets, visionscore
    dfcht = df[['name','kills','deaths','assists','dmgtoturrets','visionscore']].groupby('name').mean()
    dfcht['KDA'] = (dfcht['kills']+dfcht['assists'])/dfcht['deaths']

    #将信息与df_3合并
    df_4 = df_3.merge(df_tk[df_tk['team']==1][['matchid','total_kills']].rename(columns={'total_kills':'1_total_kills'}), on = 'matchid',how='left')\
            .merge(df_tk[df_tk['team']==2][['matchid','total_kills']].rename(columns={'total_kills':'2_total_kills'}), on = 'matchid',how='left')

    def addHeroInfo(df1,df2,column):
            return df1.merge(df2,left_on=column,right_on='name',how='left')\
                    .rename(columns={'kills':column+'_kills','deaths':column+'_deaths','assists':column+'_assists',\
                            'KDA':column+'_KDA','dmgtoturrets':column+'_dmgtoturrets','visionscore':column+'_visionscore'})
        
    for i in ('1 - MID', '1 - TOP', '1 - DUO_SUPPORT', '1 - DUO_CARRY', '1 - JUNGLE',
            '2 - MID', '2 - TOP', '2 - DUO_SUPPORT', '2 - DUO_CARRY', '2 - JUNGLE'):
        df_4 = addHeroInfo(df_4,dfcht,i)
    
    return df_4

In [10]:
feature_hero = extracting(df,df_3)

In [11]:
feature_hero

Unnamed: 0,matchid,1 - DUO_CARRY,1 - DUO_SUPPORT,1 - JUNGLE,1 - MID,1 - TOP,2 - DUO_CARRY,2 - DUO_SUPPORT,2 - JUNGLE,2 - MID,...,2 - DUO_CARRY_assists,2 - DUO_CARRY_dmgtoturrets,2 - DUO_CARRY_visionscore,2 - DUO_CARRY_KDA,2 - JUNGLE_kills,2 - JUNGLE_deaths,2 - JUNGLE_assists,2 - JUNGLE_dmgtoturrets,2 - JUNGLE_visionscore,2 - JUNGLE_KDA
0,10,Draven,Nami,Warwick,Viktor,Fiora,Jinx,VelKoz,Skarner,Ahri,...,7.607522,4160.297148,9.347523,2.437049,4.790698,5.021512,9.222674,1273.091279,12.152326,2.790668
1,11,Cassiopeia,Ziggs,Evelynn,Yasuo,Twisted Fate,Caitlyn,Karma,Warwick,Syndra,...,7.443961,3807.652602,10.467949,2.403311,7.167596,6.210793,7.789013,1274.079582,11.762421,2.408164
2,12,Draven,Janna,Amumu,Lux,Vayne,Twitch,Nami,Vi,Ahri,...,7.293329,3227.032069,10.975155,2.556030,6.427994,6.104221,7.930923,1618.634518,12.077257,2.352293
3,14,Caitlyn,Zac,Wukong,Orianna,Urgot,Lucian,Karma,Warwick,Morgana,...,7.126743,3528.743616,10.461909,2.588697,7.167596,6.210793,7.789013,1274.079582,11.762421,2.408164
4,15,Miss Fortune,Thresh,Warwick,Ahri,Garen,Ziggs,Nami,Lee Sin,Yasuo,...,7.969125,11335.219468,11.354221,2.431721,6.660166,6.163571,8.526725,1321.807970,21.462089,2.463976
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
148629,187582,Ezreal,ChoGath,Lee Sin,Ekko,Malphite,Tristana,Braum,Master Yi,Lux,...,6.511253,4310.190435,9.739717,2.471919,8.919046,7.100530,5.032185,2570.170395,12.535080,1.964816
148630,187583,Tristana,Morgana,Warwick,Ziggs,Nasus,Vayne,Janna,RekSai,Veigar,...,5.956860,3201.373630,9.954429,2.130511,5.045508,5.240957,9.291132,1016.393232,10.456826,2.735500
148631,187585,Tristana,Rammus,Nidalee,Ekko,Rumble,Lucian,Braum,Shaco,Zed,...,7.126743,3528.743616,10.461909,2.588697,6.954213,5.787387,7.521781,1822.186116,13.022258,2.501300
148632,187586,Kindred,Braum,Lee Sin,Ekko,Darius,Ashe,Janna,Volibear,LeBlanc,...,8.744272,3502.367152,11.694437,2.389725,5.958966,5.600332,8.658918,1550.621679,11.789848,2.610182


### 构造伤害特征

In [12]:
def feature_dcdd(df,df_3,champs):
    champs['mid_dc']  = 1 
    champs['ad_dc'] = 1
    champs['top_dd'] = 1

    
    #按英雄计算每个英雄在MID/AD/TOP位置的伤害转换比/不死承伤
    #dc = 伤害转换比
    #dd = 不死承伤
    for i in range(138):
        idx = champs.loc[i,'id']
        df2 = df.loc[(df["championid"] == idx)&(df["adjposition"] == 'MID')]#从总表中找出该英雄在中单位的所有比赛
        df2['goldspent']=df2['goldspent'].apply(lambda x: df2['goldspent'].mean() if x == 0 else x)#为空则用平均值替代
        df3 = df2.apply(lambda x: x['totdmgtochamp'] / x['goldspent'], axis=1).mean()
        champs.loc[champs['id'] == idx,'mid_dc'] = df3
        
        df4 = df.loc[(df["championid"] == idx)&(df["adjposition"] == 'DUO_CARRY')]
        df4['goldspent']=df4['goldspent'].apply(lambda x: df4['goldspent'].mean() if x == 0 else x)
        df5 = df4.apply(lambda x: x['totdmgtochamp'] / x['goldspent'], axis=1).mean()
        champs.loc[champs['id'] == idx,'ad_dc'] = df5
        
        df6 = df.loc[(df["championid"] == idx)&(df["adjposition"] == 'TOP')]
        df6['deaths']=df6['deaths'].apply(lambda x: 1 if x == 0 else x)
        df7 = df6.apply(lambda x: x['totdmgtaken'] / x['deaths'], axis=1).mean()
        champs.loc[champs['id'] == idx,'top_dd'] = df7
        
   #合并到总表
   #1_ad_dc
    champs = champs.rename(columns = {"name":"1 - DUO_CARRY"})
    test1 = pd.merge(df_3,champs,how="left")
    test1.drop("id",axis = 1,inplace = True)
    test1.drop('mid_dc',axis = 1,inplace = True)
    test1.drop('top_dd',axis = 1,inplace = True)
    test1 = test1.rename(columns = {"ad_dc":"1_ad_dc"})
    
    
    #合并到总表
    #2_ad_dc
    champs = champs.rename(columns = {"1 - DUO_CARRY":"2 - DUO_CARRY"})
    test1 = pd.merge(test1,champs,how="left")
    test1.drop('id',axis = 1,inplace = True)
    test1.drop('mid_dc',axis = 1,inplace = True)
    test1.drop('top_dd',axis = 1,inplace = True)
    test1 = test1.rename(columns = {"ad_dc":"2_ad_dc"})
    
    
    #合并到总表
    #1_mid_dc
    champs = champs.rename(columns = {"2 - DUO_CARRY":"1 - MID"})
    test1 = pd.merge(test1,champs,how="left")
    test1.drop('id',axis = 1,inplace = True)
    test1.drop('ad_dc',axis = 1,inplace = True)
    test1.drop('top_dd',axis = 1,inplace = True)
    test1 = test1.rename(columns = {"mid_dc":"1_mid_dc"})
    
    #合并到总表
    #2_mid_dc
    champs = champs.rename(columns = {"1 - MID":"2 - MID"})
    test1 = pd.merge(test1,champs,how="left")
    test1.drop('id',axis = 1,inplace = True)
    test1.drop('ad_dc',axis = 1,inplace = True)
    test1.drop('top_dd',axis = 1,inplace = True)
    test1 = test1.rename(columns = {"mid_dc":"2_mid_dc"})
    
    
    #合并到总表
    #1_top_dd
    champs = champs.rename(columns = {"2 - MID":"1 - TOP"})
    test1 = pd.merge(test1,champs,how="left")
    test1.drop('id',axis = 1,inplace = True)
    test1.drop('ad_dc',axis = 1,inplace = True)
    test1.drop('mid_dc',axis = 1,inplace = True)
    test1 = test1.rename(columns = {"top_dd":"1_top_dd"})
    
    #合并到总表
    #2_top_dd
    champs = champs.rename(columns = {"1 - TOP":"2 - TOP"})
    test1 = pd.merge(test1,champs,how="left")
    test1.drop('id',axis = 1,inplace = True)
    test1.drop('ad_dc',axis = 1,inplace = True)
    test1.drop('mid_dc',axis = 1,inplace = True)
    test1 = test1.rename(columns = {"top_dd":"2_top_dd"})
   
    return test1

In [13]:
feature_hero_dcdd = feature_dcdd(df,feature_hero,champs)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [14]:
feature_hero_dcdd

Unnamed: 0,matchid,1 - DUO_CARRY,1 - DUO_SUPPORT,1 - JUNGLE,1 - MID,1 - TOP,2 - DUO_CARRY,2 - DUO_SUPPORT,2 - JUNGLE,2 - MID,...,2 - JUNGLE_assists,2 - JUNGLE_dmgtoturrets,2 - JUNGLE_visionscore,2 - JUNGLE_KDA,1_ad_dc,2_ad_dc,1_mid_dc,2_mid_dc,1_top_dd,2_top_dd
0,10,Draven,Nami,Warwick,Viktor,Fiora,Jinx,VelKoz,Skarner,Ahri,...,9.222674,1273.091279,12.152326,2.790668,1.785079,1.762735,2.053171,2.012985,6063.924507,5711.843500
1,11,Cassiopeia,Ziggs,Evelynn,Yasuo,Twisted Fate,Caitlyn,Karma,Warwick,Syndra,...,7.789013,1274.079582,11.762421,2.408164,1.937392,1.885442,1.693690,1.933245,3323.548922,5038.985222
2,12,Draven,Janna,Amumu,Lux,Vayne,Twitch,Nami,Vi,Ahri,...,7.930923,1618.634518,12.077257,2.352293,1.785079,1.897595,2.089822,2.012985,3654.686769,3743.404870
3,14,Caitlyn,Zac,Wukong,Orianna,Urgot,Lucian,Karma,Warwick,Morgana,...,7.789013,1274.079582,11.762421,2.408164,1.885442,1.796809,1.833891,1.926942,3901.586798,6513.916470
4,15,Miss Fortune,Thresh,Warwick,Ahri,Garen,Ziggs,Nami,Lee Sin,Yasuo,...,8.526725,1321.807970,21.462089,2.463976,1.976193,2.268102,2.012985,1.693690,7487.953516,7691.114738
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
148629,187582,Ezreal,ChoGath,Lee Sin,Ekko,Malphite,Tristana,Braum,Master Yi,Lux,...,5.032185,2570.170395,12.535080,1.964816,2.008950,1.715368,1.846537,2.089822,5776.655633,4713.623416
148630,187583,Tristana,Morgana,Warwick,Ziggs,Nasus,Vayne,Janna,RekSai,Veigar,...,9.291132,1016.393232,10.456826,2.735500,1.715368,1.741536,2.287018,1.908221,7633.987306,3844.766707
148631,187585,Tristana,Rammus,Nidalee,Ekko,Rumble,Lucian,Braum,Shaco,Zed,...,7.521781,1822.186116,13.022258,2.501300,1.715368,1.796809,1.846537,1.819516,4424.126183,6070.471444
148632,187586,Kindred,Braum,Lee Sin,Ekko,Darius,Ashe,Janna,Volibear,LeBlanc,...,8.658918,1550.621679,11.789848,2.610182,1.600535,1.731015,1.846537,1.846180,6513.916470,7633.987306


### 构造对手特征

In [16]:
df_5 = df.sort_values(['matchid','adjposition'], ascending = [1,1])

df_5['shift 1'] = df_5['name'].shift()
df_5['shift -1'] = df_5['name'].shift(-1)

def get_rival(x):
    if x['player'] <= 5:
        name_return = x['name'] + ' vs ' + x['shift -1']
    else:
        name_return = x['name'] + ' vs ' + x['shift 1']
    return name_return

df_5['match up'] = df_5.apply(get_rival, axis = 1)

In [17]:
df_rival = df_5.groupby(['adjposition', 'match up']).agg({'win': 'sum', 'match up': 'count'})
df_rival.columns = ['win matches', 'total matches']
df_rival['total matches'] = df_rival['total matches']
df_rival['win rate'] = df_rival['win matches'] /  df_rival['total matches']  * 100
df_rival['dominant score'] = df_rival['win rate'] - 50
df_rival['dominant score (ND)'] = abs(df_rival['dominant score'])
df_rival = df_rival[df_rival['total matches'] > df_rival['total matches'].sum()*0.0001]               
df_rival = df_rival.reset_index()

In [18]:
df_rival.head(5)

Unnamed: 0,adjposition,match up,win matches,total matches,win rate,dominant score,dominant score (ND)
0,DUO_CARRY,Ashe vs Caitlyn,1836.0,3625,50.648276,0.648276,0.648276
1,DUO_CARRY,Ashe vs Draven,491.0,1029,47.716229,-2.283771,2.283771
2,DUO_CARRY,Ashe vs Ezreal,757.0,1425,53.122807,3.122807,3.122807
3,DUO_CARRY,Ashe vs Jhin,332.0,635,52.283465,2.283465,2.283465
4,DUO_CARRY,Ashe vs Jinx,669.0,1368,48.903509,-1.096491,1.096491


### 合并对手特征到df_3

In [19]:
feature_hero_dcdd_rival = feature_hero_dcdd.copy()
feature_hero_dcdd_rival ['carry_vs'] = feature_hero_dcdd_rival ['1 - DUO_CARRY'] + ' vs ' + feature_hero_dcdd_rival['2 - DUO_CARRY']
feature_hero_dcdd_rival['support_vs'] = feature_hero_dcdd_rival['1 - DUO_SUPPORT'] + ' vs ' + feature_hero_dcdd_rival['2 - DUO_SUPPORT']
feature_hero_dcdd_rival['jungle_vs'] = feature_hero_dcdd_rival['1 - JUNGLE'] + ' vs ' + feature_hero_dcdd_rival['2 - JUNGLE']
feature_hero_dcdd_rival['mid_vs'] = feature_hero_dcdd_rival['1 - MID'] + ' vs ' + feature_hero_dcdd_rival['2 - MID']
feature_hero_dcdd_rival['top_vs'] = feature_hero_dcdd_rival['1 - TOP'] + ' vs ' + feature_hero_dcdd_rival['2 - TOP']

In [21]:
feature_hero_dcdd_rival =feature_hero_dcdd_rival\
           .merge(df_rival[df_rival['adjposition']=='DUO_CARRY'][['match up','dominant score']]\
                  .rename(columns={'dominant score':'carry_vs_score'}),\
                  left_on='carry_vs',right_on='match up',how='left').drop(['match up'],axis=1)\
           .merge(df_rival[df_rival['adjposition']=='DUO_SUPPORT'][['match up','dominant score']]\
                  .rename(columns={'dominant score':'support_vs_score'}),\
                  left_on='support_vs',right_on='match up',how='left').drop(['match up'],axis=1)\
           .merge(df_rival[df_rival['adjposition']=='JUNGLE'][['match up','dominant score']]\
                  .rename(columns={'dominant score':'jungle_vs_score'}),\
                  left_on='jungle_vs',right_on='match up',how='left').drop(['match up'],axis=1)\
           .merge(df_rival[df_rival['adjposition']=='MID'][['match up','dominant score']]\
                  .rename(columns={'dominant score':'mid_vs_score'}),\
                  left_on='mid_vs',right_on='match up',how='left').drop(['match up'],axis=1)\
           .merge(df_rival[df_rival['adjposition']=='TOP'][['match up','dominant score']]\
                  .rename(columns={'dominant score':'top_vs_score'}),\
                  left_on='top_vs',right_on='match up',how='left').drop(['match up'],axis=1)

feature_hero_dcdd_rival.head()

Unnamed: 0,matchid,1 - DUO_CARRY,1 - DUO_SUPPORT,1 - JUNGLE,1 - MID,1 - TOP,2 - DUO_CARRY,2 - DUO_SUPPORT,2 - JUNGLE,2 - MID,...,carry_vs,support_vs,jungle_vs,mid_vs,top_vs,carry_vs_score,support_vs_score,jungle_vs_score,mid_vs_score,top_vs_score
0,10,Draven,Nami,Warwick,Viktor,Fiora,Jinx,VelKoz,Skarner,Ahri,...,Draven vs Jinx,Nami vs VelKoz,Warwick vs Skarner,Viktor vs Ahri,Fiora vs Galio,-0.223015,-3.98773,,-5.04,1.256281
1,11,Cassiopeia,Ziggs,Evelynn,Yasuo,Twisted Fate,Caitlyn,Karma,Warwick,Syndra,...,Cassiopeia vs Caitlyn,Ziggs vs Karma,Evelynn vs Warwick,Yasuo vs Syndra,Twisted Fate vs Wukong,,,-4.255319,4.633205,
2,12,Draven,Janna,Amumu,Lux,Vayne,Twitch,Nami,Vi,Ahri,...,Draven vs Twitch,Janna vs Nami,Amumu vs Vi,Lux vs Ahri,Vayne vs Caitlyn,2.918288,2.587992,0.352941,-1.33301,
3,14,Caitlyn,Zac,Wukong,Orianna,Urgot,Lucian,Karma,Warwick,Morgana,...,Caitlyn vs Lucian,Zac vs Karma,Wukong vs Warwick,Orianna vs Morgana,Urgot vs Darius,0.246978,,,1.054852,
4,15,Miss Fortune,Thresh,Warwick,Ahri,Garen,Ziggs,Nami,Lee Sin,Yasuo,...,Miss Fortune vs Ziggs,Thresh vs Nami,Warwick vs Lee Sin,Ahri vs Yasuo,Garen vs Tryndamere,,1.661356,4.265949,1.525719,-0.588235


### 构造队友合作特征

In [22]:
df_8 = df.sort_values(['matchid','team_role'], ascending = [1,1])
df_8['shift -1'] = df_8['name'].shift(-1)
df_8['match up'] = df_8['name'] + ' & ' + df_8['shift -1']

In [23]:
df_teammates = (df_8[df_8['adjposition']=='DUO_CARRY']).append(df_8[df_8['adjposition']=='JUNGLE'])
df_teammates = df_teammates.groupby(['adjposition', 'match up']).agg({'win': 'sum', 'match up': 'count'})
df_teammates.columns = ['win matches', 'total matches']
df_teammates['total matches'] = df_teammates['total matches']
df_teammates['win rate'] = df_teammates['win matches'] /  df_teammates['total matches']  * 100
df_teammates['dominant score'] = df_teammates['win rate'] - 50
df_teammates['dominant score (ND)'] = abs(df_teammates['dominant score'])
df_teammates = df_teammates[df_teammates['total matches'] > df_teammates['total matches'].sum()*0.0001]                
df_teammates = df_teammates.reset_index()

### 合并队友合作特征到df_3

In [24]:
feature_hero_dcdd_riva_teama = feature_hero_dcdd_rival.copy()
feature_hero_dcdd_riva_teama['teammates_jungle_mid'] = feature_hero_dcdd_riva_teama['1 - JUNGLE'] + ' & '\
                                                        + feature_hero_dcdd_riva_teama['1 - MID']
feature_hero_dcdd_riva_teama['teammates_carry_sup'] = feature_hero_dcdd_riva_teama['1 - DUO_CARRY'] + ' & '\
                                                        + feature_hero_dcdd_riva_teama['1 - DUO_SUPPORT']

In [25]:
feature_hero_dcdd_riva_teama = feature_hero_dcdd_riva_teama.merge(df_teammates[df_teammates['adjposition']=='DUO_CARRY'][['match up','dominant score']]\
                  .rename(columns={'dominant score':'teammates_carry_sup_score'}),\
                  left_on='teammates_carry_sup',right_on='match up',how='left').drop(['match up'],axis=1)\
           .merge(df_teammates[df_teammates['adjposition']=='JUNGLE'][['match up','dominant score']]\
                  .rename(columns={'dominant score':'teammates_jungle_mid_score'}),\
                  left_on='teammates_jungle_mid',right_on='match up',how='left').drop(['match up'],axis=1)

feature_hero_dcdd_riva_teama.head()

Unnamed: 0,matchid,1 - DUO_CARRY,1 - DUO_SUPPORT,1 - JUNGLE,1 - MID,1 - TOP,2 - DUO_CARRY,2 - DUO_SUPPORT,2 - JUNGLE,2 - MID,...,top_vs,carry_vs_score,support_vs_score,jungle_vs_score,mid_vs_score,top_vs_score,teammates_jungle_mid,teammates_carry_sup,teammates_carry_sup_score,teammates_jungle_mid_score
0,10,Draven,Nami,Warwick,Viktor,Fiora,Jinx,VelKoz,Skarner,Ahri,...,Fiora vs Galio,-0.223015,-3.98773,,-5.04,1.256281,Warwick & Viktor,Draven & Nami,1.720183,-3.488372
1,11,Cassiopeia,Ziggs,Evelynn,Yasuo,Twisted Fate,Caitlyn,Karma,Warwick,Syndra,...,Twisted Fate vs Wukong,,,-4.255319,4.633205,,Evelynn & Yasuo,Cassiopeia & Ziggs,,-1.592357
2,12,Draven,Janna,Amumu,Lux,Vayne,Twitch,Nami,Vi,Ahri,...,Vayne vs Caitlyn,2.918288,2.587992,0.352941,-1.33301,,Amumu & Lux,Draven & Janna,4.726688,5.032468
3,14,Caitlyn,Zac,Wukong,Orianna,Urgot,Lucian,Karma,Warwick,Morgana,...,Urgot vs Darius,0.246978,,,1.054852,,Wukong & Orianna,Caitlyn & Zac,,-2.941176
4,15,Miss Fortune,Thresh,Warwick,Ahri,Garen,Ziggs,Nami,Lee Sin,Yasuo,...,Garen vs Tryndamere,,1.661356,4.265949,1.525719,-0.588235,Warwick & Ahri,Miss Fortune & Thresh,2.794562,2.766532


In [29]:
feature_hero_dcdd_riva_teama.shape

(148634, 94)

In [27]:
nullrows = feature_hero_dcdd_riva_teama.isnull().any(1)

In [28]:
feature_hero_dcdd_riva_teama[nullrows]

Unnamed: 0,matchid,1 - DUO_CARRY,1 - DUO_SUPPORT,1 - JUNGLE,1 - MID,1 - TOP,2 - DUO_CARRY,2 - DUO_SUPPORT,2 - JUNGLE,2 - MID,...,top_vs,carry_vs_score,support_vs_score,jungle_vs_score,mid_vs_score,top_vs_score,teammates_jungle_mid,teammates_carry_sup,teammates_carry_sup_score,teammates_jungle_mid_score
0,10,Draven,Nami,Warwick,Viktor,Fiora,Jinx,VelKoz,Skarner,Ahri,...,Fiora vs Galio,-0.223015,-3.987730,,-5.040000,1.256281,Warwick & Viktor,Draven & Nami,1.720183,-3.488372
1,11,Cassiopeia,Ziggs,Evelynn,Yasuo,Twisted Fate,Caitlyn,Karma,Warwick,Syndra,...,Twisted Fate vs Wukong,,,-4.255319,4.633205,,Evelynn & Yasuo,Cassiopeia & Ziggs,,-1.592357
2,12,Draven,Janna,Amumu,Lux,Vayne,Twitch,Nami,Vi,Ahri,...,Vayne vs Caitlyn,2.918288,2.587992,0.352941,-1.333010,,Amumu & Lux,Draven & Janna,4.726688,5.032468
3,14,Caitlyn,Zac,Wukong,Orianna,Urgot,Lucian,Karma,Warwick,Morgana,...,Urgot vs Darius,0.246978,,,1.054852,,Wukong & Orianna,Caitlyn & Zac,,-2.941176
4,15,Miss Fortune,Thresh,Warwick,Ahri,Garen,Ziggs,Nami,Lee Sin,Yasuo,...,Garen vs Tryndamere,,1.661356,4.265949,1.525719,-0.588235,Warwick & Ahri,Miss Fortune & Thresh,2.794562,2.766532
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
148629,187582,Ezreal,ChoGath,Lee Sin,Ekko,Malphite,Tristana,Braum,Master Yi,Lux,...,Malphite vs Riven,-0.579710,,-2.375566,-0.501672,1.538462,Lee Sin & Ekko,Ezreal & ChoGath,,-1.795143
148630,187583,Tristana,Morgana,Warwick,Ziggs,Nasus,Vayne,Janna,RekSai,Veigar,...,Nasus vs Teemo,0.934120,-5.229717,,5.263158,-4.754098,Warwick & Ziggs,Tristana & Morgana,-4.529915,5.018587
148631,187585,Tristana,Rammus,Nidalee,Ekko,Rumble,Lucian,Braum,Shaco,Zed,...,Rumble vs Gnar,1.125989,,,5.515588,,Nidalee & Ekko,Tristana & Rammus,,-3.174603
148632,187586,Kindred,Braum,Lee Sin,Ekko,Darius,Ashe,Janna,Volibear,LeBlanc,...,Darius vs Nasus,,-4.142357,-0.959488,1.242236,7.517900,Lee Sin & Ekko,Kindred & Braum,,-1.795143


### 存储特征csv

In [36]:
feature_hero_dcdd_riva_teama.to_csv('feature_hero_dcdd_riva_teama.csv',index =False)

In [37]:
feature_hero_dcdd_riva_teama = pd.read_csv('feature_hero_dcdd_riva_teama.csv')

In [39]:
feature_hero_dcdd_riva_teama.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 148634 entries, 0 to 148633
Data columns (total 95 columns):
Unnamed: 0                      148634 non-null int64
matchid                         148634 non-null int64
1 - DUO_CARRY                   148634 non-null object
1 - DUO_SUPPORT                 148634 non-null object
1 - JUNGLE                      148634 non-null object
1 - MID                         148634 non-null object
1 - TOP                         148634 non-null object
2 - DUO_CARRY                   148634 non-null object
2 - DUO_SUPPORT                 148634 non-null object
2 - JUNGLE                      148634 non-null object
2 - MID                         148634 non-null object
2 - TOP                         148634 non-null object
T1 win                          148634 non-null float64
1_total_kills                   148634 non-null float64
2_total_kills                   148634 non-null float64
1 - MID_kills                   148634 non-null float64
1 - MID