In [3]:
# initial imports and settings
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sc2reader
import re
import random
import datetime
import json
from scripts.classes import ReplayInfo
import time
import math

# sklearn
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

# import svm
from sklearn.svm import SVC
from sklearn.svm import LinearSVC

# import random forest
from sklearn.ensemble import RandomForestClassifier

# import KNN
from sklearn.neighbors import KNeighborsClassifier

# import grid_search
from sklearn.model_selection import GridSearchCV

# import warnings
import warnings

In [3]:
replays_dir = "data/events"
columns_list = []
index_list = []

start_time = time.time()
total_replays = len(os.listdir(replays_dir))
# loop through each pkl in the directory
for i, filename in enumerate(os.listdir(replays_dir)):
   
    # print progress and eta
    eta = round((time.time() - start_time)/(i+1)*(total_replays-i),0)
    print(
        f'\r{i+1}/{total_replays} - ETA: {datetime.timedelta(seconds=eta)}',
        end=''
    )

    # handle both pkl and csv files
    if filename.endswith(".pkl"):
        # load the pkl file into a dataframe
        df = pd.read_pickle(os.path.join(replays_dir, filename))
    elif filename.endswith(".csv"):
        # load the csv file into a dataframe
        df = pd.read_csv(os.path.join(replays_dir, filename), index_col=0)
    else:
        continue

    # remove extension from filename
    filename_no_ext = filename.split(".")[0]
    index_list.append(filename_no_ext)

    # get the column names
    columns_list.append(df.columns.values)


36812/36812 - ETA: 0:00:00

In [4]:
# create a dataframe with all the column names
columns_df = pd.DataFrame(
    [sorted(x) for x in columns_list], 
    index=index_list
)
columns_df.head()


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
00002a9dc36528452ad1938d035d16186b31390e6faef0487a0468214d644b34,ability_id,count,ff_minerals_lost_army,ff_minerals_lost_economy,ff_minerals_lost_technology,ff_vespene_lost_army,ff_vespene_lost_economy,ff_vespene_lost_technology,food_made,food_used,...,vespene_used_active_forces,vespene_used_current,vespene_used_current_army,vespene_used_current_economy,vespene_used_current_technology,vespene_used_in_progress,vespene_used_in_progress_army,vespene_used_in_progress_economy,vespene_used_in_progress_technology,workers_active_count
0000bb7d32c8ba566cbeb35224d0bac61d95c57cb128cb12b5747945d17f16f6,ability_id,count,ff_minerals_lost_army,ff_minerals_lost_economy,ff_minerals_lost_technology,ff_vespene_lost_army,ff_vespene_lost_economy,ff_vespene_lost_technology,food_made,food_used,...,vespene_used_active_forces,vespene_used_current,vespene_used_current_army,vespene_used_current_economy,vespene_used_current_technology,vespene_used_in_progress,vespene_used_in_progress_army,vespene_used_in_progress_economy,vespene_used_in_progress_technology,workers_active_count
0003e67ac992cd561bc1e12d1f26db10b4d39eeb9a6e9a7d425ba680f994535c,ability_id,count,ff_minerals_lost_army,ff_minerals_lost_economy,ff_minerals_lost_technology,ff_vespene_lost_army,ff_vespene_lost_economy,ff_vespene_lost_technology,food_made,food_used,...,vespene_used_active_forces,vespene_used_current,vespene_used_current_army,vespene_used_current_economy,vespene_used_current_technology,vespene_used_in_progress,vespene_used_in_progress_army,vespene_used_in_progress_economy,vespene_used_in_progress_technology,workers_active_count
0005c8546e8d1c0e8a045cf5082da428ff66163ae9ce4895ef8df08691da138c,ability_id,count,ff_minerals_lost_army,ff_minerals_lost_economy,ff_minerals_lost_technology,ff_vespene_lost_army,ff_vespene_lost_economy,ff_vespene_lost_technology,food_made,food_used,...,vespene_used_active_forces,vespene_used_current,vespene_used_current_army,vespene_used_current_economy,vespene_used_current_technology,vespene_used_in_progress,vespene_used_in_progress_army,vespene_used_in_progress_economy,vespene_used_in_progress_technology,workers_active_count
000a36f155c9991b01d6ee10d4fe669db8a09f391b403d17595c70281dcff230,ability_id,count,ff_minerals_lost_army,ff_minerals_lost_economy,ff_minerals_lost_technology,ff_vespene_lost_army,ff_vespene_lost_economy,ff_vespene_lost_technology,food_made,food_used,...,vespene_used_active_forces,vespene_used_current,vespene_used_current_army,vespene_used_current_economy,vespene_used_current_technology,vespene_used_in_progress,vespene_used_in_progress_army,vespene_used_in_progress_economy,vespene_used_in_progress_technology,workers_active_count


In [5]:
# find any rows that have nan values
columns_df[columns_df.isnull().any(axis=1)]


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
00963846e01f659436d5ffdd92482740d9258b88c546a2ed84e85699dd7c5553,count,ff_minerals_lost_army,ff_minerals_lost_economy,ff_minerals_lost_technology,ff_vespene_lost_army,ff_vespene_lost_economy,ff_vespene_lost_technology,food_made,food_used,frame,...,vespene_used_current_army,vespene_used_current_economy,vespene_used_current_technology,vespene_used_in_progress,vespene_used_in_progress_army,vespene_used_in_progress_economy,vespene_used_in_progress_technology,workers_active_count,,
00d98eeeb97d26e05e1fb403bbe6007a017791d5f56167915811e4ad86209601,count,ff_minerals_lost_army,ff_minerals_lost_economy,ff_minerals_lost_technology,ff_vespene_lost_army,ff_vespene_lost_economy,ff_vespene_lost_technology,food_made,food_used,frame,...,vespene_used_current_army,vespene_used_current_economy,vespene_used_current_technology,vespene_used_in_progress,vespene_used_in_progress_army,vespene_used_in_progress_economy,vespene_used_in_progress_technology,workers_active_count,,
014c724a43f733c4fcfc2fcf3b130c1bc7bcc857ecbf80b232c71dcef624ac6d,count,ff_minerals_lost_army,ff_minerals_lost_economy,ff_minerals_lost_technology,ff_vespene_lost_army,ff_vespene_lost_economy,ff_vespene_lost_technology,food_made,food_used,frame,...,vespene_used_current_army,vespene_used_current_economy,vespene_used_current_technology,vespene_used_in_progress,vespene_used_in_progress_army,vespene_used_in_progress_economy,vespene_used_in_progress_technology,workers_active_count,,
019ca1cc8f0dbecb198105696c1a0a8c338e95d023410acdf7ecfa6437e65e50,ability_id,count,ff_minerals_lost_army,ff_minerals_lost_economy,ff_minerals_lost_technology,ff_vespene_lost_army,ff_vespene_lost_economy,ff_vespene_lost_technology,food_made,food_used,...,vespene_used_current,vespene_used_current_army,vespene_used_current_economy,vespene_used_current_technology,vespene_used_in_progress,vespene_used_in_progress_army,vespene_used_in_progress_economy,vespene_used_in_progress_technology,workers_active_count,
02ddc3ab01147d478733579be09edae2dcceb5458906422da0e2b8cf907ca3b4,count,ff_minerals_lost_army,ff_minerals_lost_economy,ff_minerals_lost_technology,ff_vespene_lost_army,ff_vespene_lost_economy,ff_vespene_lost_technology,food_made,food_used,frame,...,vespene_used_current_army,vespene_used_current_economy,vespene_used_current_technology,vespene_used_in_progress,vespene_used_in_progress_army,vespene_used_in_progress_economy,vespene_used_in_progress_technology,workers_active_count,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
fdfb3eb49acae60a8f5b4ed648797de94788ad60566bd6f14f07a93b47ec57bb,ability_id,count,ff_minerals_lost_army,ff_minerals_lost_economy,ff_minerals_lost_technology,ff_vespene_lost_army,ff_vespene_lost_economy,ff_vespene_lost_technology,food_made,food_used,...,vespene_used_current,vespene_used_current_army,vespene_used_current_economy,vespene_used_current_technology,vespene_used_in_progress,vespene_used_in_progress_army,vespene_used_in_progress_economy,vespene_used_in_progress_technology,workers_active_count,
fe47e889289211a435b8e893945fdf86df57d5b97fa729f2f97a296a29c6dae5,ability_id,count,ff_minerals_lost_army,ff_minerals_lost_economy,ff_minerals_lost_technology,ff_vespene_lost_army,ff_vespene_lost_economy,ff_vespene_lost_technology,food_made,food_used,...,vespene_used_current,vespene_used_current_army,vespene_used_current_economy,vespene_used_current_technology,vespene_used_in_progress,vespene_used_in_progress_army,vespene_used_in_progress_economy,vespene_used_in_progress_technology,workers_active_count,
fe8691dd94c3621b179a504232a5ea7698d522f55f18d2a8637313005291d2ee,count,ff_minerals_lost_army,ff_minerals_lost_economy,ff_minerals_lost_technology,ff_vespene_lost_army,ff_vespene_lost_economy,ff_vespene_lost_technology,food_made,food_used,frame,...,vespene_used_current_army,vespene_used_current_economy,vespene_used_current_technology,vespene_used_in_progress,vespene_used_in_progress_army,vespene_used_in_progress_economy,vespene_used_in_progress_technology,workers_active_count,,
ff769ceb5cbb5b90f564310ca91e957baa4ca3730c157f0b70346666a68a02a8,ability_id,count,ff_minerals_lost_army,ff_minerals_lost_economy,ff_minerals_lost_technology,ff_vespene_lost_army,ff_vespene_lost_economy,ff_vespene_lost_technology,food_made,food_used,...,vespene_used_current,vespene_used_current_army,vespene_used_current_economy,vespene_used_current_technology,vespene_used_in_progress,vespene_used_in_progress_army,vespene_used_in_progress_economy,vespene_used_in_progress_technology,workers_active_count,


In [475]:

df = pd.read_pickle('data/events/0a0bda8930b08bd1411093afe298df14ac5f00a37177cde56452260808d1a13a.pkl')
pd.get_dummies(df['name'])

Unnamed: 0,AddToControlGroupEvent,BasicCommandEvent,CameraEvent,ChatEvent,GetControlGroupEvent,PlayerLeaveEvent,PlayerSetupEvent,PlayerStatsEvent,ProgressEvent,SelectionEvent,...,UnitBornEvent,UnitDiedEvent,UnitDoneEvent,UnitInitEvent,UnitPositionsEvent,UnitTypeChangeEvent,UpdateTargetPointCommandEvent,UpdateTargetUnitCommandEvent,UpgradeCompleteEvent,UserOptionsEvent
0,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12132,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
12133,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
12134,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
12136,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0


In [476]:
df.set_index('frame', inplace=True)
df.info()

frame_range = range(max(df.index)+1)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8263 entries, 0 to 15180
Data columns (total 59 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   name                                  8263 non-null   object 
 1   pid                                   7332 non-null   float64
 2   second                                8263 non-null   int64  
 3   count                                 18 non-null     float64
 4   upgrade_type_name                     18 non-null     object 
 5   unit_type_name                        545 non-null    object 
 6   ff_minerals_lost_army                 193 non-null    float64
 7   ff_minerals_lost_economy              193 non-null    float64
 8   ff_minerals_lost_technology           193 non-null    float64
 9   ff_vespene_lost_army                  193 non-null    float64
 10  ff_vespene_lost_economy               193 non-null    float64
 11  ff_vespene_lost_

In [477]:
df.shape

(8263, 59)

In [478]:
# list of unnecessary_event
unnecessary_events = [
    'AddToControlGroupEvent',
    'CameraEvent',
    'ChatEvent',
    'GetControlGroupEvent',
    'PlayerSetupEvent',
    'PlayerLeaveEvent',
    'ProgressEvent',
    'SelectionEvent',
    'SetControlGroupEvent',
    'UnitPositionsEvent',
    'UnitTypeChangeEvent',
    'UpdateTargetPointCommandEvent',
    'UpdateTargetUnitCommandEvent',
    'UserOptionsEvent'
]

necessary_events = [
    'TargetPointCommandEvent',
    'UnitBornEvent',
    'PlayerStatsEvent',
    'UnitDiedEvent',
    'UnitInitEvent',
    'UnitDoneEvent',
    'TargetUnitCommandEvent',
    'BasicCommandEvent',
    'UpgradeCompleteEvent'
]

# remove all rows where 'name' is in the unnecessary_events list
df = df[df['name'].isin(necessary_events)]
df.shape

(1642, 59)

In [479]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
pid,765.0,1.139869,0.347078,1.0,1.0,1.0,1.0,2.0
second,1642.0,491.861145,304.010907,0.0,240.0,537.0,745.75,946.0
count,18.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
ff_minerals_lost_army,193.0,14.507772,35.309512,0.0,0.0,0.0,0.0,100.0
ff_minerals_lost_economy,193.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ff_minerals_lost_technology,193.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ff_vespene_lost_army,193.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ff_vespene_lost_economy,193.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ff_vespene_lost_technology,193.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
food_made,193.0,79.642487,44.376956,15.0,39.0,78.0,126.0,165.0


In [480]:
unnecessary_columns = [
    'count',
    'second',
    'killer_pid',
    'ff_minerals_lost_army',
    'ff_minerals_lost_economy',
    'ff_minerals_lost_technology',
    'ff_vespene_lost_army',
    'ff_vespene_lost_economy',
    'ff_vespene_lost_technology'
]
# drop unnecessary columns
df.drop(columns=unnecessary_columns, inplace=True)


    

In [481]:
unnecessary_upgrades = set(
    [x for x in df['upgrade_type_name'] if 'spray' in str(x).lower()]
)

for x in unnecessary_upgrades:
    df = df[df['upgrade_type_name'] != x]

df.shape

(1632, 50)

In [482]:
df = pd.get_dummies(df, columns=['name', 'upgrade_type_name'])


In [483]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1632 entries, 0 to 15150
Data columns (total 63 columns):
 #   Column                                        Non-Null Count  Dtype  
---  ------                                        --------------  -----  
 0   pid                                           755 non-null    float64
 1   unit_type_name                                519 non-null    object 
 2   food_made                                     193 non-null    float64
 3   food_used                                     193 non-null    float64
 4   minerals_collection_rate                      193 non-null    float64
 5   minerals_current                              193 non-null    float64
 6   minerals_killed                               193 non-null    float64
 7   minerals_killed_army                          193 non-null    float64
 8   minerals_killed_economy                       193 non-null    float64
 9   minerals_killed_technology                    193 non-null    

In [484]:
player1_df = df[df['pid'] == 1].copy()
player2_df = df[df['pid'] == 2].copy()

# drop pid column
player1_df.drop(columns=['pid'], inplace=True)
player2_df.drop(columns=['pid'], inplace=True)
# drop 'unit_type_name' column from player1_df and player2_df
player1_df.drop(columns=['unit_type_name'], inplace=True)
player2_df.drop(columns=['unit_type_name'], inplace=True)

# drop all rows where 'pid' is 1 or 2 from df
df = df[df['pid'] != 1]
df = df[df['pid'] != 2]

print(player1_df.shape, player2_df.shape, df.shape)

(654, 61) (101, 61) (877, 63)


In [485]:
# drop all null columns from df
df.dropna(axis=1, how='all', inplace=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 877 entries, 0 to 15150
Data columns (total 16 columns):
 #   Column                                        Non-Null Count  Dtype 
---  ------                                        --------------  ----- 
 0   unit_type_name                                519 non-null    object
 1   name_BasicCommandEvent                        877 non-null    uint8 
 2   name_PlayerStatsEvent                         877 non-null    uint8 
 3   name_TargetPointCommandEvent                  877 non-null    uint8 
 4   name_TargetUnitCommandEvent                   877 non-null    uint8 
 5   name_UnitBornEvent                            877 non-null    uint8 
 6   name_UnitDiedEvent                            877 non-null    uint8 
 7   name_UnitDoneEvent                            877 non-null    uint8 
 8   name_UnitInitEvent                            877 non-null    uint8 
 9   name_UpgradeCompleteEvent                     877 non-null    uint8 
 10  

In [486]:
# get all numeric columns in df
numeric_columns = [x for x in df.columns if df[x].dtype != 'object']

# get all numeric columns where sum is 0 in df
zero_columns = [x for x in numeric_columns if df[x].sum() == 0]

df.drop(columns=zero_columns, inplace=True)

In [487]:

# convert all values in df['unit_type_name'] to strings
df['unit_type_name'] = df['unit_type_name'].astype(str)
df['unit_type_name'].unique()

array(['MineralField750', 'SpacePlatformGeyser', 'MineralField',
       'CollapsibleRockTowerRampLeft', 'LabMineralField750',
       'LabMineralField', 'VespeneGeyser', 'UnbuildableRocksDestructible',
       'DestructibleRockEx16x6', 'DestructibleRockEx1DiagonalHugeBLUR',
       'BeaconArmy', 'BeaconDefend', 'BeaconAttack', 'BeaconHarass',
       'BeaconIdle', 'BeaconAuto', 'BeaconDetect', 'BeaconScout',
       'BeaconClaim', 'BeaconExpand', 'BeaconRally', 'BeaconCustom1',
       'BeaconCustom2', 'BeaconCustom3', 'BeaconCustom4', 'Nexus',
       'Probe', 'Pylon', 'nan', 'Gateway', 'Assimilator',
       'CyberneticsCore', 'Sentry', 'TwilightCouncil', 'Stalker',
       'DarkShrine', 'RoboticsFacility', 'ShieldBattery', 'Oracle',
       'Phoenix', 'DarkTemplar', 'Observer', 'ForceField', 'WarpPrism',
       'Adept', 'Immortal', 'Forge', 'Archon', 'AdeptPhaseShift',
       'RoboticsBay', 'Zealot', 'PhotonCannon', 'Disruptor',
       'DisruptorPhased', 'TemplarArchive', 'HighTemplar'], dtyp

In [488]:


unique_units = df['unit_type_name'].unique()
unnecessary_units =[]
for unit in unique_units:
    if ('beacon' in unit.lower()) or ('nan' in unit.lower()):
        unnecessary_units.append(unit)

# drop all rows in df where 'unit_type_name' is in unnecessary_units
df = df[~df['unit_type_name'].isin(unnecessary_units)]

# dummify unit_type_name
df = pd.get_dummies(df, columns=['unit_type_name'])

df.info()        


<class 'pandas.core.frame.DataFrame'>
Int64Index: 489 entries, 0 to 14990
Data columns (total 44 columns):
 #   Column                                              Non-Null Count  Dtype
---  ------                                              --------------  -----
 0   name_UnitBornEvent                                  489 non-null    uint8
 1   name_UnitDiedEvent                                  489 non-null    uint8
 2   name_UnitDoneEvent                                  489 non-null    uint8
 3   name_UnitInitEvent                                  489 non-null    uint8
 4   unit_type_name_Adept                                489 non-null    uint8
 5   unit_type_name_AdeptPhaseShift                      489 non-null    uint8
 6   unit_type_name_Archon                               489 non-null    uint8
 7   unit_type_name_Assimilator                          489 non-null    uint8
 8   unit_type_name_CollapsibleRockTowerRampLeft         489 non-null    uint8
 9   unit_type_name_Cybe

In [489]:
# rename columns to player_1_ and player_2_
player1_df.columns = ['player_1_' + x for x in player1_df.columns]
player2_df.columns = ['player_2_' + x for x in player2_df.columns]


In [490]:

print(player1_df.shape, player2_df.shape, df.shape)

# drop duplicate rows
player1_df.drop_duplicates(inplace=True)
player2_df.drop_duplicates(inplace=True)

# for all missing value rows in player1_df and player2_df, fill with above row
player1_df.fillna(method='ffill', inplace=True)
player2_df.fillna(method='ffill', inplace=True)

print(player1_df.shape, player2_df.shape, df.shape)

# drop duplicate rows again
player1_df.drop_duplicates(inplace=True)
player2_df.drop_duplicates(inplace=True)

print(player1_df.shape, player2_df.shape, df.shape)

(654, 61) (101, 61) (489, 44)
(153, 61) (101, 61) (489, 44)
(153, 61) (101, 61) (489, 44)


In [491]:
# make new df with index of frame_range
new_df = pd.DataFrame(index=frame_range)
new_df

0
1
2
3
4
...
15176
15177
15178
15179
15180


In [492]:
new_df = new_df.join(player1_df)
new_df = new_df.join(player2_df)
new_df = new_df.join(df)

In [493]:
# drop all rows with index = 0
new_df.drop(index=0, inplace=True)

# insert single row with index = 0 with all 0 values
new_df.loc[0] = 0

# create new row with index = 0 backfilled with first non-nan row
# for each column get first non-nan value
for col in new_df.columns:
    first_non_nan = new_df[col].first_valid_index()
    new_df.loc[0, col] = new_df.loc[first_non_nan, col]


# sort new_df by index
new_df.sort_index(inplace=True)
new_df

Unnamed: 0,player_1_food_made,player_1_food_used,player_1_minerals_collection_rate,player_1_minerals_current,player_1_minerals_killed,player_1_minerals_killed_army,player_1_minerals_killed_economy,player_1_minerals_killed_technology,player_1_minerals_lost,player_1_minerals_lost_army,...,unit_type_name_Sentry,unit_type_name_ShieldBattery,unit_type_name_SpacePlatformGeyser,unit_type_name_Stalker,unit_type_name_TemplarArchive,unit_type_name_TwilightCouncil,unit_type_name_UnbuildableRocksDestructible,unit_type_name_VespeneGeyser,unit_type_name_WarpPrism,unit_type_name_Zealot
0,15.0,12.0,0.0,50.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,15.0,12.0,0.0,50.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15176,,,,,,,,,,,...,,,,,,,,,,
15177,,,,,,,,,,,...,,,,,,,,,,
15178,,,,,,,,,,,...,,,,,,,,,,
15179,,,,,,,,,,,...,,,,,,,,,,


In [494]:
# get all columns from player1_df and player2_df
player_columns_mask = player1_df.columns.tolist() + player2_df.columns.tolist()

# forward fill all new_df mask columns 
new_df[player_columns_mask] = new_df[player_columns_mask].fillna(method='ffill').copy()

new_df

Unnamed: 0,player_1_food_made,player_1_food_used,player_1_minerals_collection_rate,player_1_minerals_current,player_1_minerals_killed,player_1_minerals_killed_army,player_1_minerals_killed_economy,player_1_minerals_killed_technology,player_1_minerals_lost,player_1_minerals_lost_army,...,unit_type_name_Sentry,unit_type_name_ShieldBattery,unit_type_name_SpacePlatformGeyser,unit_type_name_Stalker,unit_type_name_TemplarArchive,unit_type_name_TwilightCouncil,unit_type_name_UnbuildableRocksDestructible,unit_type_name_VespeneGeyser,unit_type_name_WarpPrism,unit_type_name_Zealot
0,15.0,12.0,0.0,50.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,15.0,12.0,0.0,50.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
2,15.0,12.0,0.0,50.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
3,15.0,12.0,0.0,50.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
4,15.0,12.0,0.0,50.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15176,103.0,51.0,531.0,360.0,3800.0,2400.0,900.0,500.0,7975.0,5600.0,...,,,,,,,,,,
15177,103.0,51.0,531.0,360.0,3800.0,2400.0,900.0,500.0,7975.0,5600.0,...,,,,,,,,,,
15178,103.0,51.0,531.0,360.0,3800.0,2400.0,900.0,500.0,7975.0,5600.0,...,,,,,,,,,,
15179,103.0,51.0,531.0,360.0,3800.0,2400.0,900.0,500.0,7975.0,5600.0,...,,,,,,,,,,


In [495]:
# fillna for all other columns
new_df.fillna(0, inplace=True)
new_df

Unnamed: 0,player_1_food_made,player_1_food_used,player_1_minerals_collection_rate,player_1_minerals_current,player_1_minerals_killed,player_1_minerals_killed_army,player_1_minerals_killed_economy,player_1_minerals_killed_technology,player_1_minerals_lost,player_1_minerals_lost_army,...,unit_type_name_Sentry,unit_type_name_ShieldBattery,unit_type_name_SpacePlatformGeyser,unit_type_name_Stalker,unit_type_name_TemplarArchive,unit_type_name_TwilightCouncil,unit_type_name_UnbuildableRocksDestructible,unit_type_name_VespeneGeyser,unit_type_name_WarpPrism,unit_type_name_Zealot
0,15.0,12.0,0.0,50.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,15.0,12.0,0.0,50.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,15.0,12.0,0.0,50.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,15.0,12.0,0.0,50.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,15.0,12.0,0.0,50.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15176,103.0,51.0,531.0,360.0,3800.0,2400.0,900.0,500.0,7975.0,5600.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15177,103.0,51.0,531.0,360.0,3800.0,2400.0,900.0,500.0,7975.0,5600.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15178,103.0,51.0,531.0,360.0,3800.0,2400.0,900.0,500.0,7975.0,5600.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15179,103.0,51.0,531.0,360.0,3800.0,2400.0,900.0,500.0,7975.0,5600.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [496]:
other_columns = [x for x in new_df.columns if x not in player_columns_mask]
new_df[other_columns]

Unnamed: 0,name_UnitBornEvent,name_UnitDiedEvent,name_UnitDoneEvent,name_UnitInitEvent,unit_type_name_Adept,unit_type_name_AdeptPhaseShift,unit_type_name_Archon,unit_type_name_Assimilator,unit_type_name_CollapsibleRockTowerRampLeft,unit_type_name_CyberneticsCore,...,unit_type_name_Sentry,unit_type_name_ShieldBattery,unit_type_name_SpacePlatformGeyser,unit_type_name_Stalker,unit_type_name_TemplarArchive,unit_type_name_TwilightCouncil,unit_type_name_UnbuildableRocksDestructible,unit_type_name_VespeneGeyser,unit_type_name_WarpPrism,unit_type_name_Zealot
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15176,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15177,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15178,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15179,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
