In [1]:
import numpy as np
import pandas as pd
import matplotlib as mpl
from matplotlib import pyplot as plt
mpl.rcParams['axes.unicode_minus'] = False
plt.style.use('ggplot')
import seaborn as sns
import missingno as maso
%matplotlib inline

import re
from tqdm import tqdm
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import KFold 
!pip install bayesian-optimization
from bayes_opt import BayesianOptimization
from functools import partial
import lightgbm as lgb
from sklearn.cluster import KMeans
import warnings
warnings.filterwarnings("ignore")

  import pandas.util.testing as tm


Collecting bayesian-optimization
  Downloading https://files.pythonhosted.org/packages/b5/26/9842333adbb8f17bcb3d699400a8b1ccde0af0b6de8d07224e183728acdf/bayesian_optimization-1.1.0-py3-none-any.whl
Installing collected packages: bayesian-optimization
Successfully installed bayesian-optimization-1.1.0


In [0]:
train_data = pd.read_csv('/content/drive/My Drive/data/dacon게임분석대회/train.csv')
test_data = pd.read_csv('/content/drive/My Drive/data/dacon게임분석대회/test.csv')

In [0]:
train_data["winner"] = train_data["winner"].astype(np.uint8)
train_data["time"] = train_data["time"].astype(np.float32)
train_data["player"] = train_data["player"].astype(np.uint8)
train_data["species"] = train_data["species"].astype(np.object)
train_data["event"] = train_data["event"].astype(np.object)
train_data["event_contents"] = train_data["event_contents"].astype(np.object)

In [0]:
test_data["time"] = test_data["time"].astype(np.float32)
test_data["player"] = test_data["player"].astype(np.uint8)
test_data["species"] = test_data["species"].astype(np.object)
test_data["event"] = test_data["event"].astype(np.object)
test_data["event_contents"] = test_data["event_contents"].astype(np.object)

In [0]:
event_dict = {i:j for (i,j) in zip(train_data.event.unique(), range(train_data.event.nunique()))}
train_data['event'] = train_data['event'].map(event_dict)
test_data['event'] = test_data['event'].map(event_dict)

species_dict = {i:j for (i,j) in zip(train_data.species.unique(), range(train_data.species.nunique()))}
train_data['species'] = train_data['species'].map(species_dict)
test_data['species'] = test_data['species'].map(species_dict)

In [0]:
def map_time(df, seconds=0, interval=0):
  max_game_id = df.iloc[len(df)-1].game_id
  print(max_game_id)
  index = 0 
  df['stage'] = 0 # add new column of 'stage' to df
  if seconds:
    for game_id in range(0, max_game_id+1):
      for row in df[df['game_id'] == game_id].iterrows():
        df.at[index, 'stage'] = df.at[index, 'time'] // seconds
        index += 1
  elif interval:
    for game_id in range(0, max_game_id+1):
      interval_counter = 0 # interval counter to keep track of when to cut off
      curr_interval = 0
      temp_df = df[df['game_id'] == game_id]
      num_items_per_interval = len(temp_df) // interval
      for row in temp_df.iterrows():
        df.at[index, 'stage'] = curr_interval
        if interval_counter == num_items_per_interval:
          curr_interval += 1
          interval_counter = 0
        interval_counter += 1
        index += 1
  return df

In [0]:
train_data = map_time(train_data, interval=7)
test_data = map_time(test_data, interval=7)

In [0]:
# 모델에 학습시킬 데이터 준비
def data_preparation(df, answer=False):
    game_ids = df['game_id'].unique()
    events = ['0','1','2','3','4','5','6','7']
    unique_event_0, unique_event_1, delta_event = {}, {}, {}
    for event in events:
        unique_event_0['P0_' + event] = 0
        unique_event_1['P1_' + event] = 0
        #delta_event['delta_' + event] = 0
        
    species = df.groupby(['game_id', 'player']).species.unique()
    event_count = df.groupby(['game_id', 'player']).event.value_counts()
    if answer:
        winners = df.groupby(['game_id']).winner.max()
    
    x_data, y_data = [], []
    for game_id in tqdm(game_ids):
        count = 0
        df_event_count = event_count[game_id].unstack(level=-1)
        df = pd.DataFrame(species[game_id])
        df = pd.concat([df, df_event_count], axis=1)   
        df = df.fillna(0)
        
        df_P0_species = pd.DataFrame([df.loc[0]['species'][0]], columns=['P0_species'])        
        df_P1_species = pd.DataFrame([df.loc[1]['species'][0]], columns=['P1_species'])
        df = df.drop(['species'], axis=1)

        df_P0_event = unique_event_0.copy()
        for column in df.columns:
            df_P0_event['P0_' + str(column)] = df.loc[0][column]
        df_P0_event = pd.DataFrame(pd.Series(df_P0_event)).T

        df_P1_event = unique_event_1.copy()
        for column in df.columns:
            df_P1_event['P1_' + str(column)] = df.loc[1][column]
        df_P1_event = pd.DataFrame(pd.Series(df_P1_event)).T
        """
        df_delta_event = delta_event.copy()
        for column in df.columns:
            df_delta_event['delta_' + str(column)] = df_P0_event['P0_' + str(column)][0] - df_P1_event['P1_' + str(column)][0]
        df_delta_event = pd.DataFrame(pd.Series(df_delta_event)).T
        """
        out = pd.concat([df_P0_species, df_P0_event, df_P1_species, df_P1_event], axis=1) #, df_delta_event
        out.index = [game_id]
        out.index.name = 'game_id'
        
        x_data.append(out)
        if answer:
            y_data.append(winners[game_id])  

    x_data = pd.concat(x_data)
    y_data = np.array(y_data)
    
    return x_data, y_data

In [0]:
x_train, y_train = data_preparation(train_data, answer=True)

In [0]:
x_test, _ = data_preparation(test_data, answer=False)

In [0]:
zerg_unit = {'Rava' : 0, 'Drone' : 6, 'Overlord' : 1, 'Overseer' : 0, 'Viper' : 0, 'SwarmHost' : 0, 'Changeling' : 0, 'Zergling' : 0, 'Baneling' : 0, 'Queen' : 0, 'Hydralisk' : 0, 'Lurker' : 0, 'Roach' : 0, 'Mutalisk' : 0, 'Corruptor' : 0, 'BroodLord' : 0, 'Broodling' : 0, 'Infestor' : 0, 'InfestedTrrran' : 0, 'Ultralisk' : 0, 'NydusWorm' : 0}
zerg_building = {'Hatchery' : 1, 'Lair' : 0, 'Hive' : 0, 'Extractor' : 0, 'EvolutionChamber' : 0, 'SpawningPool' : 0, 'BanelingNest' : 0, 'RoachWarren' : 0, 'SpineCrawler' : 0, 'SporeCrawler' : 0, 'HydraliskDen' : 0, 'Spire' : 0, 'InfestationPit' : 0, 'NydusNetwork' : 0, 'GreaterSpire' : 0, 'UltraliskCavern' : 0, 'NydusCanal' : 0}
zerg_skill = {}

terran_unit = {'SCV' : 6, 'MULE' : 0, 'Nuke' : 0, 'Marine' : 0, 'Cyclone' : 0, 'Marauder' : 0, 'Reaper' : 0, 'Ghost' : 0, 'Hellion' : 0, 'BattleHellion' : 0, 'SiegeTank' : 0, 'Thor' : 0, 'Viking' : 0, 'Medivac' : 0, 'Banshee' : 0, 'Raven' : 0, 'AutoTurret' : 0, 'Battlecruiser' : 0, 'WidowMine' : 0, 'Liberator' : 0}
terran_building = {'CommandCenter' : 1, 'PlanetaryFortress' : 0, 'OrbitalCommand' : 0, 'SupplyDepot' : 0, 'Refinery' : 0, 'Barracks' : 0, 'BarracksTechLab' : 0, 'BarracksReactor' : 0, 'Bunker' : 0, 'EngineeringBay' : 0, 'MissileTurret' : 0, 'SensorTower' : 0, 'GhostAcademy' : 0, 'Factory' : 0,'FactoryTechLab' : 0,'FactoryReactor' : 0, 'Starport' : 0, 'StarportTechLab' : 0, 'StarportReactor' : 0,'Armory' : 0, 'FusionCore' : 0, 'TechLab' : 0, 'Reactor' : 0}
terran_skill = {}

protoss_unit = {'Probe' : 6, 'MothershipCore' : 0, 'Mothership' : 0, 'Zealot' : 0, 'Immortal' : 0, 'Stalker' : 0, 'Sentry' : 0, 'HighTemplar' : 0, 'DarkTemplar' : 0, 'Archon' : 0, 'Immertal' : 0, 'Observer' : 0, 'WarpPrism' : 0, 'Colossus' : 0, 'Disruptor' : 0, 'Phoenix' : 0, 'Carrier' : 0, 'VoidRay' : 0, 'Tempest' : 0, 'Oracle' : 0, 'Adept' : 0}
protoss_building = {'Nexus' : 1, 'Pylon' : 0, 'Assimilator' : 0, 'Gateway' : 0, 'WarpGate' : 0, 'Forge' : 0, 'CyberneticsCore' : 0, 'PhotonCannon' : 0, 'Stargate' : 0, 'TwilightCouncil' : 0, 'RoboticsFacility' : 0, 'RoboticsBay' : 0, 'DarkShrine' : 0, 'TemplarArchive' : 0, 'FleetBeacon' : 0}
protoss_skill = {}

In [0]:
zerg_unit_point = {'Rava' : 0, 'Drone' : 50, 'Overlord' : 100, 'Overseer' : 100, 'Viper' : 300, 'SwarmHost' : 175, 'Changeling' : 0, 'Zergling' : 50, 'Baneling' : 50, 'Queen' : 150, 'Hydralisk' : 150, 'Lurker' : 150, 'Roach' : 100, 'Mutalisk' : 200, 'Corruptor' : 250, 'BroodLord' : 300, 'Broodling' : 0, 'Infestor' : 250, 'InfestedTrrran' : 0, 'Ultralisk' : 500, 'NydusWorm' : 200}
zerg_building_point = {'Hatchery' : 300, 'Lair' : 250, 'Hive' : 350, 'Extractor' : 25, 'EvolutionChamber' : 75, 'SpawningPool' : 200, 'BanelingNest' : 150, 'RoachWarren' : 150, 'SpineCrawler' : 100, 'SporeCrawler' : 75, 'HydraliskDen' : 200, 'Spire' : 400, 'InfestationPit' : 200, 'NydusNetwork' : 350, 'GreaterSpire' : 250, 'UltraliskCavern' : 350, 'NydusCanal' : 350}
zerg_skill_point = {}

terran_unit_point = {'SCV' : 50, 'MULE' : 0, 'Nuke' : 0, 'Marine' : 50, 'Cyclone' : 250, 'Marauder' : 125, 'Reaper' : 100, 'Ghost' : 275, 'Hellion' : 100, 'BattleHellion' : 0 ,'SiegeTank' : 275, 'Thor' : 500, 'Viking' : 225, 'Medivac' : 200, 'Banshee' : 250, 'Raven' : 300, 'AutoTurret' : 0, 'Battlecruiser' : 700, 'WidowMine' : 100, 'Liberator' : 0}
terran_building_point = {'CommandCenter' : 400, 'PlanetaryFortress' : 200, 'OrbitalCommand' : 150, 'SupplyDepot' : 100, 'Refinery' : 75, 'Barracks' : 150, 'BarracksTechLab' : 100, 'BarracksReactor' : 75, 'Bunker' : 100, 'EngineeringBay' : 125, 'MissileTurret' : 100, 'SensorTower' : 225, 'GhostAcademy' : 200, 'Factory' : 250, 'FactoryTechLab' : 75, 'FactoryReactor' : 100, 'Starport' : 250, 'StarportTechLab' : 75, 'StarportReactor' : 100, 'Armory' : 250, 'FusionCore' : 300, 'TechLab' : 75, 'Reactor' : 100}
terran_skill_point = {}

protoss_unit_point = {'Probe' : 50, 'MothershipCore' : 200, 'Mothership' : 800, 'Zealot' : 100, 'Immortal' : 350, 'Stalker' : 175, 'Sentry' : 150, 'HighTemplar' : 200, 'DarkTemplar' : 250, 'Archon' : 0, 'Immertal' : 0, 'Observer' : 100, 'WarpPrism' : 200, 'Colossus' : 500, 'Disruptor' : 300, 'Phoenix' : 250, 'Carrier' : 600, 'VoidRay' : 400, 'Tempest' : 425, 'Oracle' : 300, 'Adept' : 125}
protoss_building_point = {'Nexus' : 400, 'Pylon' : 100, 'Assimilator' : 75, 'Gateway' : 150, 'WarpGate' : 0, 'Forge' : 150, 'CyberneticsCore' : 150, 'PhotonCannon' : 150, 'Stargate' : 300, 'TwilightCouncil' : 250, 'RoboticsFacility' : 300, 'RoboticsBay' : 400, 'DarkShrine' : 300, 'TemplarArchive' : 350, 'FleetBeacon' : 500}
protoss_skill_point = {}

In [0]:
# game_id 별 player 종족
def get_species(df, game_id):
  df = df.loc[df['game_id']==game_id]
  
  P0_species = df.loc[df['player']==0]
  P0_species = P0_species['species'].iloc[0]

  P1_species = df.loc[df['player']==1]
  P1_species = P1_species['species'].iloc[0]
  
  df = df.loc[df['event']==2]
  df_0 = df.loc[df['player']==0]
  df_1 = df.loc[df['player']==1]

  if P0_species == 0:
    P0_unit = terran_unit.copy()
    P0_building = terran_building.copy()
    P0_skill = terran_skill.copy()
    P0_unit_point = terran_unit_point.copy()
    P0_building_point = terran_building_point.copy()
    P0_skill_point = terran_skill_point.copy()
  elif P0_species == 1:
    P0_unit = protoss_unit.copy()
    P0_building = protoss_building.copy()
    P0_skill = protoss_skill.copy()
    P0_unit_point = protoss_unit_point.copy()
    P0_building_point = protoss_building_point.copy()
    P0_skill_point = protoss_skill_point.copy()
  elif P0_species == 2:
    P0_unit = zerg_unit.copy()
    P0_building = zerg_building.copy()
    P0_skill = zerg_skill.copy()
    P0_unit_point = zerg_unit_point.copy()
    P0_building_point = zerg_building_point.copy()
    P0_skill_point = zerg_skill_point.copy()
  
  if P1_species == 0:
    P1_unit = terran_unit.copy()
    P1_building = terran_building.copy()
    P1_skill = terran_skill.copy()
    P1_unit_point = terran_unit_point.copy()
    P1_building_point = terran_building_point.copy()
    P1_skill_point = terran_skill_point.copy()
  elif P1_species == 1:
    P1_unit = protoss_unit.copy()
    P1_building = protoss_building.copy()
    P1_skill = protoss_skill.copy()
    P1_unit_point = protoss_unit_point.copy()
    P1_building_point = protoss_building_point.copy()
    P1_skill_point = protoss_skill_point.copy()
  elif P1_species == 2:
    P1_unit = zerg_unit.copy()
    P1_building = zerg_building.copy()
    P1_skill = zerg_skill.copy()
    P1_unit_point = zerg_unit_point.copy()
    P1_building_point = zerg_building_point.copy()
    P1_skill_point = zerg_skill_point.copy()
  
  return [P0_unit, P0_building, P0_unit_point, P0_building_point, P1_unit, P1_building, P1_unit_point, P1_building_point]

In [0]:
# game_id 별 player의 event 추출
def get_event(df, game_id):
    df = df.loc[df['game_id']==game_id]
    df = df.loc[df['event']==2]
    df_0 = df.loc[df['player']==0]
    df_1 = df.loc[df['player']==1]

    P0_contents = df_0['event_contents'].str.split()
    P0_contents = P0_contents.apply(lambda x: pd.Series(x))
    try:
      a = P0_contents[2]
    except IndexError: a = pd.Series(['null'])
    a.fillna('null', inplace = True)
    a = a.to_list()
    
    for i in range(len(a)):
      aparse = re.sub('[-=.#/?:$};]', '', a[i])
      a[i] = aparse
    
    P1_contents = df_1['event_contents'].str.split()
    P1_contents = P1_contents.apply(lambda x: pd.Series(x))
      
    try:  
      b = P1_contents[2]
    except IndexError: b = pd.Series(['null'])
    b.fillna('null', inplace = True)
    b = b.to_list()

    for i in range(len(b)):
      bparse = re.sub('[-=.#/?:$};]', '', b[i])
      b[i] = bparse

    return [a,b]

In [0]:
def get_event_time(df, game_id):
    df = df.loc[df['game_id']==game_id]
    df = df.loc[df['event']==2]
    df_0 = df.loc[df['player']==0]
    df_1 = df.loc[df['player']==1]
    
    df_0 = df_0["stage"].tolist()
    df_1 = df_1["stage"].tolist()
    return [df_0, df_1]

In [0]:
#유닛 생산 분할
def get_unit_point_split(df, game_id):
  P = get_species(df, game_id)
  P0_unit = P[0]
  P0_unit_point = P[2]
  P1_unit = P[4]
  P1_unit_point = P[6]
  c = get_event(df ,game_id)
  a = c[0]
  b = c[1]

  d = get_event_time(df, game_id)
  e = d[0]
  e.append(7)
  e.append(7)
  f = d[1]
  f.append(7)
  f.append(7)

  P0_time = [0,0,0,0,0,0,0]
  P1_time = [0,0,0,0,0,0,0]

  for i in range(len(a)):
      aparse = re.sub('[-=.#/?:$};]', '', a[i])
      a[i] = aparse
      if a[i] == 'BuildSiegeTank':
        a[i] = 'TrainSiegeTank'
      if a[i] == 'BuildWidowMine':
        a[i] = 'TrainWidowMine'
      if a[i] == 'MorphToOverseer':
        a[i] = 'MorphOverseer'
      if a[i] == 'CalldwonMULE':
        a[i] = 'TrainMULE'
      if a[i] == 'MorphToRavager':
        a[i] = 'MorphRava'
      if a[i] == 'MorphToTransportOverlord':
        a[i] = 'ToTransportOverlord'
      if a[i] == 'MorphToGreaterSpire':
        a[i] = 'BuildGreaterSpire'
      if a[i] == 'MorphToBroodLord':
        a[i] = 'MorphBroodLord'
      if a[i] == 'MorphToLurker':
        a[i] = 'MorphLurker'
      if a[i] == 'TrainInterceptor':
        a[i] = 'Interceptor'
      if a[i] == 'BuildAutoTurret':
        a[i] = 'TrainAutoTurret'
      if a[i] == 'BuildThor':
        a[i] = 'TrainThor'
      if a[i] == 'BuildBattleHellion':
        a[i] = 'TrainBattleHellion'
      if a[i] == 'BuildPointDefenseDrone':
        a[i] = 'PointDefenseDrone'

  for i in range(len(a)):
    time_count = 0
    if 'Train' in a[i] or 'Morph' in a[i] and 'Cancel' not in a[i]:
      c = a[i]
      P0_unit[c[5:]] += 1
  
    elif  'Build' not in a[i] and 'Cancel' in a[i]:
      c = a[i]
      p = c[6:]
      if p in P0_unit:
        P0_unit[p] -= 1
    
    if e[i] < e[i+1]:
      P0_unit_list = list(P0_unit.values())
      P0_unit_point_list = list(P0_unit_point.values())
    
      for j in range(len(P0_unit_list)):
        try:
          P0_time[e[i]] += P0_unit_list[j] * P0_unit_point_list[j]
        except IndexError: P0_time[-1] += P0_unit_list[j] * P0_unit_point_list[j]

  for i in range(len(b)):
      bparse = re.sub('[-=.#/?:$};]', '', b[i])
      b[i] = bparse
      if b[i] == 'BuildSiegeTank':
        b[i] = 'TrainSiegeTank'
      if b[i] == 'BuildWidowMine':
        b[i] = 'TrainWidowMine'
      if b[i] == 'MorphToOverseer':
        b[i] = 'MorphOverseer'
      if b[i] == 'CalldwonMULE':
        b[i] = 'TrainMULE'
      if b[i] == 'MorphToRavager':
        b[i] = 'MorphRava'
      if b[i] == 'MorphToTransportOverlord':
        b[i] = 'ToTransportOverlord'
      if b[i] == 'MorphToGreaterSpire':
        b[i] = 'BuildGreaterSpire'
      if b[i] == 'MorphToBroodLord':
        b[i] = 'MorphBroodLord'
      if b[i] == 'MorphToLurker':
        b[i] = 'MorphLurker'
      if b[i] == 'TrainInterceptor':
        b[i] = 'Interceptor'
      if b[i] == 'BuildAutoTurret':
        b[i] = 'TrainAutoTurret'
      if b[i] == 'BuildThor':
        b[i] = 'TrainThor'
      if b[i] == 'BuildBattleHellion':
        b[i] = 'TrainBattleHellion'
      if b[i] == 'BuildPointDefenseDrone':
        b[i] = 'PointDefenseDrone'


  for i in range(len(b)):
    if 'Train' in b[i] or 'Morph' in b[i] and 'Cancel' not in b[i]:
      d = b[i]
      P1_unit[d[5:]] += 1
  
    elif 'Build' not in b[i] and 'Cancel' in b[i]:
      d = b[i]
      q = d[6:]
      if q in P1_unit:
        P1_unit[q] -= 1

    if f[i] < f[i+1]:
      P1_unit_list = list(P1_unit.values())
      P1_unit_point_list = list(P1_unit_point.values())
    
      for j in range(len(P1_unit_list)):
        try:
          P1_time[f[i]] += P1_unit_list[j] * P1_unit_point_list[j]
        except IndexError: P1_time[-1] += P1_unit_list[j] * P1_unit_point_list[j]

  return P0_time, P1_time

In [0]:
def get_building_point_split(df, game_id):
  P = get_species(df, game_id)
  P0_building = P[1]
  P0_building_point = P[3]
  P1_building = P[5]
  P1_building_point = P[7]
  c = get_event(df ,game_id)
  a = c[0]
  b = c[1]

  d = get_event_time(df, game_id)
  e = d[0]
  e.append(7)
  e.append(7) 
  f = d[1]
  f.append(7)
  f.append(7)

  P0_time = [0,0,0,0,0,0,0]
  P1_time = [0,0,0,0,0,0,0]

  for i in range(len(a)):
      aparse = re.sub('[-=.#/?:$};]', '', a[i])
      a[i] = aparse
      if a[i] == 'BuildSiegeTank':
        a[i] = 'TrainSiegeTank'
      if a[i] == 'BuildWidowMine':
        a[i] = 'TrainWidowMine'
      if a[i] == 'BuildHellion':
        a[i] = 'TrainHellion'
      if a[i] == 'BuildCreepTumor':
        a[i] = 'CreepTumor'
      if a[i] == 'BuildOracleStasisTrap':
        a[i] = 'OracleStasisTrap'
      if a[i] == 'BuildAutoTurret':
        a[i] = 'TrainAutoTurret'
      if a[i] == 'BuildThor':
        a[i] = 'TrainThor'
      if a[i] == 'BuildBattleHellion':
        a[i] = 'TrainBattleHellion'
      if a[i] == 'BuildPointDefenseDrone':
        a[i] = 'PointDefenseDrone'

  for i in range(len(a)):
    if 'Build' in a[i] and 'Cancel' not in a[i] and 'Halt' not in a[i]:
      c = a[i]
      P0_building[c[5:]] += 1

    elif  'Build' not in a[i] and 'Cancel' in a[i]:
      c = a[i]
      p = c[6:]
      if p in P0_building:
        P0_building[p] -= 1

    if e[i] < e[i+1]:
      P0_building_list = list(P0_building.values())
      P0_building_point_list = list(P0_building_point.values())
    
      for j in range(len(P0_building_list)):
        try:
          P0_time[e[i]] += P0_building_list[j] * P0_building_point_list[j]
        except IndexError: P0_time[-1] += P0_building_list[j] * P0_building_point_list[j]

  for i in range(len(b)):
      bparse = re.sub('[-=.#/?:$};]', '', b[i])
      b[i] = bparse
      if b[i] == 'BuildSiegeTank':
        b[i] = 'TrainSiegeTank'
      if b[i] == 'BuildWidowMine':
        b[i] = 'TrainWidowMine'
      if b[i] == 'BuildHellion':
        b[i] = 'TrainHellion'
      if b[i] == 'BuildCreepTumor':
        b[i] = 'CreepTumor'
      if b[i] == 'BuildOracleStasisTrap':
        b[i] = 'OracleStasisTrap'
      if b[i] == 'BuildAutoTurret':
        b[i] = 'TrainAutoTurret'
      if b[i] == 'BuildThor':
        b[i] = 'TrainThor'
      if b[i] == 'BuildBattleHellion':
        b[i] = 'TrainBattleHellion'
      if b[i] == 'BuildPointDefenseDrone':
        b[i] = 'PointDefenseDrone'

  for i in range(len(b)):
    if 'Build' in b[i] and 'Cancel' not in b[i] and 'Halt' not in b[i]:
      c = b[i]
      P1_building[c[5:]] += 1
  
    elif  'Build' not in b[i] and 'Cancel' in b[i]:
      c = b[i]
      p = c[6:]
      if p in P1_building:
        P1_building[p] -= 1

    if f[i] < f[i+1]:
      P1_building_list = list(P1_building.values())
      P1_building_point_list = list(P1_building_point.values())
    
      for j in range(len(P1_building_list)):
        try:
          P1_time[f[i]] += P1_building_list[j] * P1_building_point_list[j]
        except IndexError: P1_time[-1] += P1_building_list[j] * P1_building_point_list[j]

  return P0_time, P1_time

In [0]:
# 유닛,건물 분할 계산
def get_total_point_split(df):
    game_ids = len(df['game_id'].unique())
    P0_total_unit_point = []
    P0_total_building_point = []
    P1_total_unit_point = []
    P1_total_building_point = []

    try:
      for i in range(game_ids):
        unit_point = get_unit_point_split(df,i)
        building_point = get_building_point_split(df,i)
        
        P0_total_unit_point.append(unit_point[0])
        P1_total_unit_point.append(unit_point[1])
        P0_total_building_point.append(building_point[0])
        P1_total_building_point.append(building_point[1])
    except:return [P0_total_unit_point, P0_total_building_point, P1_total_unit_point, P1_total_building_point]
    
    return [P0_total_unit_point, P0_total_building_point, P1_total_unit_point, P1_total_building_point]

In [0]:
total = get_total_point_split(train_data)

In [0]:
P0_unit_split = total[0]
P0_building_split = =total[1]
P1_unit_split = total[2]
P1_building_split = total[3]

In [0]:
P0_unit_df = pd.DataFrame(P0_unit_split, columns =['P0_unit_t1', 'P0_unit_t2', 'P0_unit_t3', 'P0_unit_t4', 'P0_unit_t5', 'P0_unit_t6', 'P0_unit_t7',],dtype = int)
P0_building_df = pd.DataFrame(P0_building_split, columns =['P0_building_t1', 'P0_building_t2', 'P0_building_t3', 'P0_building_t4', 'P0_building_t5', 'P0_building_t6', 'P0_building_t7'],dtype = int)
P1_unit_df = pd.DataFrame(P1_unit_split, columns =['P1_unit_t1', 'P1_unit_t2', 'P1_unit_t3', 'P1_unit_t4', 'P1_unit_t5', 'P1_unit_t6', 'P1_unit_t7',],dtype = int)
P1_building_df = pd.DataFrame(P1_building_split, columns =['P1_building_t1', 'P1_building_t2', 'P1_building_t3', 'P1_building_t4', 'P1_building_t5', 'P1_building_t6', 'P1_building_t7'],dtype = int)

In [0]:
x_train = pd.merge(x_train, P0_unit_df, left_index=True, right_index=True)
x_train = pd.merge(x_train, P0_building_df, left_index=True, right_index=True)
x_train = pd.merge(x_train, P1_unit_df, left_index=True, right_index=True)
x_train = pd.merge(x_train, P1_building_df, left_index=True, right_index=True)

In [0]:
total_t = get_total_point_split(test_data)

In [0]:
P0_unit_split_t = total_t[0]
P0_building_split_t = =total_t[1]
P1_unit_split_t = total_t[2]
P1_building_split_t = total_t[3]

In [0]:
P0_unit_df_t = pd.DataFrame(P0_unit_split_t, columns =['P0_unit_t1', 'P0_unit_t2', 'P0_unit_t3', 'P0_unit_t4', 'P0_unit_t5', 'P0_unit_t6', 'P0_unit_t7',],dtype = int)
P0_building_df_t = pd.DataFrame(P0_building_split_t, columns =['P0_building_t1', 'P0_building_t2', 'P0_building_t3', 'P0_building_t4', 'P0_building_t5', 'P0_building_t6', 'P0_building_t7'],dtype = int)
P1_unit_df_t = pd.DataFrame(P1_unit_split_t, columns =['P1_unit_t1', 'P1_unit_t2', 'P1_unit_t3', 'P1_unit_t4', 'P1_unit_t5', 'P1_unit_t6', 'P1_unit_t7',],dtype = int)
P1_building_df_t = pd.DataFrame(P1_building_split_t, columns =['P1_building_t1', 'P1_building_t2', 'P1_building_t3', 'P1_building_t4', 'P1_building_t5', 'P1_building_t6', 'P1_building_t7'],dtype = int)

In [0]:
x_test = pd.merge(x_test, P0_unit_df_t, left_index=True, right_index=True)
x_test = pd.merge(x_test, P0_building_df_t, left_index=True, right_index=True)
x_test = pd.merge(x_test, P1_unit_df_t, left_index=True, right_index=True)
x_test = pd.merge(x_test, P1_building_df_t, left_index=True, right_index=True)

In [0]:
def get_species_winrate(df, game_id):
  df = df.loc[df['game_id']==game_id]
  
  P0_species = df.loc[df['player']==0]
  P0_species = P0_species['species'].iloc[0]

  P1_species = df.loc[df['player']==1]
  P1_species = P1_species['species'].iloc[0]
  
  df = df.loc[df['event']==2]
  df_0 = df.loc[df['player']==0]
  df_1 = df.loc[df['player']==1]

  
  if P0_species == P1_species:
    P0_winrate = 50
    P1_winrate = 50
  elif P0_species == 0 and P1_species == 1:
    P0_winrate = 49.88
    P1_winrate = 50.12
  elif P0_species == 1 and P1_species == 0:
    P0_winrate = 50.12
    P1_winrate = 49.88
  elif P0_species == 0 and P1_species == 2:
    P0_winrate = 49.35
    P1_winrate = 50.65
  elif P0_species == 2 and P1_species == 0:
    P0_winrate = 50.65
    P1_winrate = 49.35
  elif P0_species == 1 and P1_species == 2:
    P0_winrate = 48.79
    P1_winrate = 51.21
  elif P0_species == 2 and P1_species == 1:
    P0_winrate = 51.21
    P1_winrate = 48.79

  return [P0_winrate, P1_winrate]

In [0]:
def get_total_species_winrate(df):
  game_ids = len(df['game_id'].unique())
  P0_total_winrate = []
  P1_total_winrate = []

  for i in range(game_ids):
    c = get_species_winrate(df, i)
    P0_total_winrate.append(c[0])
    P1_total_winrate.append(c[1])
  
  return P0_total_winrate, P1_total_winrate

In [0]:
winrate = get_total_species_winrate(train_data)

In [0]:
P0_species_winrate = winrate[0]
P1_species_winrate = winrate[1]

In [0]:
P0_species_winrate = pd.DataFrame(P0_species_winrate, columns =['P0_species_winrate',],dtype = float)
P1_species_winrate = pd.DataFrame(P1_species_winrate, columns =['P1_species_winrate'],dtype = float)

In [0]:
x_train = pd.merge(x_train, P0_species_winrate, left_index=True, right_index=True)
x_train = pd.merge(x_train, P1_species_winrate, left_index=True, right_index=True)

In [0]:
winrate_t = get_total_species_winrate(test_data)

In [0]:
P0_species_winrate_t = winrate_t[0]
P1_species_winrate_t = winrate_t[1]

In [0]:
P0_species_winrate_t = pd.DataFrame(P0_species_winrate_t, columns =['P0_species_winrate',],dtype = float)
P1_species_winrate_t = pd.DataFrame(P1_species_winrate_t, columns =['P1_species_winrate'],dtype = float)

In [0]:
x_train = pd.merge(x_train, P0_species_winrate_t, left_index=True, right_index=True)
x_train = pd.merge(x_train, P1_species_winrate_t, left_index=True, right_index=True)

In [0]:
def map_and_starting(train):
  df_train = pd.DataFrame(train.game_id.unique(), columns=['game_id'])
  
  df_train_p0 = train[(train.event==0)&(train.player==0)]
  df_train_p0 = df_train_p0[df_train_p0.shift(1).game_id!=df_train_p0.game_id] # 쉬프트를 이용하여 각 게임의 첫번째 데이터 찾기
  df_train_p0 = df_train_p0.iloc[:, [0,6]].rename({'event_contents':'player0_starting'}, axis = 1)
  df_train_p0.index = df_train_p0['game_id']
  df_train_p0 = df_train_p0.drop(['game_id'], axis=1)
  df_train = pd.merge(df_train, df_train_p0, on='game_id', how='left')
  del df_train_p0

  df_train_p1 = train[(train.event==0)&(train.player==1)]
  df_train_p1 = df_train_p1[df_train_p1.shift(1).game_id!=df_train_p1.game_id]
  df_train_p1 = df_train_p1.iloc[:, [0,6]].rename({'event_contents':'player1_starting'}, axis = 1)
  df_train_p1.index = df_train_p1['game_id']
  df_train_p1 = df_train_p1.drop(['game_id'], axis=1)
  df_train = pd.merge(df_train, df_train_p1, on='game_id', how='left')
  del df_train_p1

  df_train['player0_starting'] = df_train.player0_starting.str.split('(').str[1]
  df_train['player0_starting'] = df_train.player0_starting.str.split(')').str[0]
  split_xy = df_train.player0_starting.str.split(',')
  df_train['player0_x'] = split_xy.str[0].astype('float')
  df_train['player0_y'] = split_xy.str[1].astype('float')
  del split_xy

  df_train['player1_starting'] = df_train.player1_starting.str.split('(').str[1]
  df_train['player1_starting'] = df_train.player1_starting.str.split(')').str[0]
  split_xy = df_train.player1_starting.str.split(',')
  df_train['player1_x'] = split_xy.str[0].astype('float')
  df_train['player1_y'] = split_xy.str[1].astype('float')
  del split_xy
  
  df_train = df_train.set_index('game_id')
  location_p0 = df_train.loc[:, ['player0_x', 'player0_y']]
  location_p0 = location_p0.rename({'player0_x':'location_x', 'player0_y':'location_y'}, axis=1)

  location_p1 = df_train.loc[:, ['player1_x', 'player1_y']]
  location_p1 = location_p1.rename({'player1_x':'location_x', 'player1_y':'location_y'}, axis=1)
  location_p1.index += location_p0.index[-1]+1

  location_p1.index = location_p1.index.map(int)

  location = pd.concat([location_p0, location_p1])
  location = location.fillna(0)

  del location_p0, location_p1

  kmeans_clst = KMeans(n_clusters=15).fit(location)
  location['starting'] = kmeans_clst.labels_+1

  for cluster in range(15):
    point = location[location.starting==cluster+1]
    loc = point.loc[:,['location_x', 'location_y']]
    del point
    loc['center_x'] = kmeans_clst.cluster_centers_[cluster][0]
    loc['center_y'] = kmeans_clst.cluster_centers_[cluster][1]
    distance = np.sqrt(np. square(loc.location_x - loc.center_x) + np.square(loc.location_y - loc.center_y))
    location.loc[loc.index, 'distance'] = distance
    del loc
  
  idx = location[location.distance>5].index
  location.loc[idx, 'starting'] = 0
  del idx
  location.index = location.index.map(int)

  df_train['player0_starting'] = location.loc[df_train.index, 'starting']
  location.index -= (df_train.index[-1]+1)
  df_train['player1_starting'] = location.loc[df_train.index, 'starting']
  del location

  # 불필요한 컬럼 삭제
  df_train = df_train.drop(['player0_x', 'player0_y', 'player1_x', 'player1_y'], axis = 1)
  df_train = df_train.fillna(0)

  map_list = []
  for point in range(1,16):
    couple = df_train[df_train.player0_starting == point].player1_starting.value_counts()
    if couple[couple.index[1]]<100:
        map_list.append([point, couple.index[0], 999])
    else:
        map_list.append([point, couple.index[0], couple.index[1]])
  map_list = np.sort(map_list, axis = 1)
  map_list = np.unique(map_list, axis = 0)

  for m in map_list:
    idx = df_train[(df_train.player0_starting == 0)&((df_train.player1_starting == m[0])|(df_train.player1_starting == m[2]))].index
    df_train.loc[idx, 'player0_starting'] = m[1]
    del idx
    idx = df_train[(df_train.player0_starting == 0)&((df_train.player1_starting == m[1])|(df_train.player1_starting == m[2]))].index
    df_train.loc[idx, 'player0_starting'] = m[0]
    del idx
    
    idx = df_train[(df_train.player1_starting == 0)&((df_train.player0_starting == m[0])|(df_train.player0_starting == m[2]))].index
    df_train.loc[idx, 'player1_starting'] = m[1]
    del idx
    idx = df_train[(df_train.player1_starting == 0)&((df_train.player0_starting == m[1])|(df_train.player0_starting == m[2]))].index
    df_train.loc[idx, 'player1_starting'] = m[0]
    del idx
  df_train[(df_train.player0_starting == 0)|(df_train.player1_starting == 0)].head()

  for map_num, m in enumerate(map_list):
    idx = df_train[(df_train.player0_starting == m[0])|(df_train.player0_starting == m[1])|(df_train.player0_starting == m[2])].index
    df_train.loc[idx, 'map'] = map_num
  del idx, map_list

  return df_train

In [0]:
map_starting = map_and_starting(train_data)

In [0]:
x_train = pd.merge(x_train, map_starting, left_index=True, right_index=True)

In [0]:
map_starting_t = map_and_starting(test_data)

In [0]:
x_test = pd.merge(x_test, map_starting_t, left_index=True, right_index=True)

In [0]:
# 모델 및 파라미터 정의
def lgb_cv(num_leaves, learning_rate, n_estimators, subsample, colsample_bytree, reg_alpha, reg_lambda, x_data=None, y_data=None, n_splits=5, output='score'):
    score = 0
    kf = KFold(n_splits=n_splits)
    models = []
    for train_index, valid_index in kf.split(x_data):
        x_train, y_train = x_data.iloc[train_index], y_data[train_index]
        x_valid, y_valid = x_data.iloc[valid_index], y_data[valid_index]
        
        model = lgb.LGBMClassifier(
            num_leaves = int(num_leaves), 
            learning_rate = learning_rate, 
            n_estimators = int(n_estimators), 
            subsample = np.clip(subsample, 0, 1), 
            colsample_bytree = np.clip(colsample_bytree, 0, 1), 
            reg_alpha = reg_alpha, 
            reg_lambda = reg_lambda,
        )
        
        model.fit(x_train, y_train)
        models.append(model)
        
        pred = model.predict_proba(x_valid)[:, 1]
        true = y_valid
        score += roc_auc_score(true, pred)/n_splits
    
    if output == 'score':
        return score
    if output == 'model':
        return models

In [0]:
# 모델과 관련없는 변수 고정
func_fixed = partial(lgb_cv, x_data=x_train, y_data=y_train, n_splits=5, output='score') 
# 베이지안 최적화 범위 설정
lgbBO = BayesianOptimization(
    func_fixed, 
    {
        'num_leaves': (16, 1024),        # num_leaves,       범위(16~1024)
        'learning_rate': (0.0001, 0.1),  # learning_rate,    범위(0.0001~0.1)
        'n_estimators': (16, 1024),      # n_estimators,     범위(16~1024)
        'subsample': (0, 1),             # subsample,        범위(0~1)
        'colsample_bytree': (0, 1),      # colsample_bytree, 범위(0~1)
        'reg_alpha': (0, 10),            # reg_alpha,        범위(0~10)
        'reg_lambda': (0, 50),           # reg_lambda,       범위(0~50)
    }, 
    random_state=4321                    # 시드 고정
)
lgbBO.maximize(init_points=5, n_iter=30) # 처음 5회 랜덤 값으로 score 계산 후 30회 최적화

# 이 예제에서는 7개 하이퍼 파라미터에 대해 30회 조정을 시도했습니다.
# 다양한 하이퍼 파라미터, 더 많은 iteration을 시도하여 최상의 모델을 얻어보세요!
# LightGBM Classifier: https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.LGBMClassifier.html

In [0]:
# 최댓값들만 추출
params = lgbBO.max['params']
models = lgb_cv(
    params['num_leaves'], 
    params['learning_rate'], 
    params['n_estimators'], 
#    params['min_child_samples'], 
#    params['max_depth'],    
    params['subsample'], 
    params['colsample_bytree'], 
    params['reg_alpha'], 
    params['reg_lambda'], 
    x_data=x_train, y_data=y_train, n_splits=5, output='model')

In [0]:
# 테스트셋에 적용
preds = []
for model in models:
    pred = model.predict_proba(x_test)[:, 1]
    preds.append(pred)
pred = np.mean(preds, axis=0)
sample_submission = pd.read_csv('/content/drive/My Drive/data/dacon게임분석대회/sample_submission.csv', index_col=0)

submission = sample_submission
submission = sample_submission
submission['winner'] = submission['winner'] + pred

In [0]:
# 제출파일 생성
submission.to_csv('submission6.csv', header='game_id')
submission.head()