In [11]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
import joblib  # Import joblib for model saving

In [12]:
# Load the training dataset
train_data = pd.read_csv('train_data.csv', delimiter=';')

In [13]:
# Drop unnecessary columns
train_data = train_data.drop(['PlayerURL', 'PlayerName'], axis=1)
train_data.head()

Unnamed: 0,PlayerID,Race,Move 1,Move 2,Move 3,Move 4,Move 5,Move 6,Move 7,Move 8,...,Move 2554,Move 2555,Move 2556,Move 2557,Move 2558,Move 2559,Move 2560,Move 2561,Move 2562,Move 2563
0,1021189,Terran,s,hotkey11,hotkey21,hotkey31,hotkey41,hotkey51,hotkey61,s,...,,,,,,,,,,
1,1021189,Terran,s,s,hotkey11,hotkey21,hotkey31,hotkey41,hotkey51,hotkey61,...,,,,,,,,,,
2,1021189,Terran,s,hotkey11,hotkey21,hotkey31,hotkey41,hotkey51,hotkey61,hotkey71,...,,,,,,,,,,
3,1021189,Terran,s,hotkey11,hotkey21,hotkey31,hotkey41,hotkey51,hotkey61,t5,...,,,,,,,,,,
4,1021189,Terran,s,hotkey11,hotkey21,hotkey31,hotkey41,hotkey51,hotkey71,hotkey61,...,,,,,,,,,,


In [14]:
# Map race to numeric values
race_mapping = {'Protoss': 0, 'Zerg': 1, 'Terran': 2}
train_data['Race'] = train_data['Race'].map(race_mapping)

# Map actions to numeric values
action_mapping = {'s': 0, 'Base': 1, 'SingleMineral': 2}
for i in range(10):
    for j in range(3):
        action_mapping[f'hotkey{i}{j}'] = 3 + i * 3 + j

# Convert action sequences to numerical values
#if tXX it converts it to 100 otherwise -1
for i in range(1, 2564):
    train_data[f'Move {i}'] = train_data[f'Move {i}'].map(lambda x: 100 if pd.notna(x) and isinstance(x, str) and x.startswith('t') else action_mapping.get(x, -1))

train_data.head()

Unnamed: 0,PlayerID,Race,Move 1,Move 2,Move 3,Move 4,Move 5,Move 6,Move 7,Move 8,...,Move 2554,Move 2555,Move 2556,Move 2557,Move 2558,Move 2559,Move 2560,Move 2561,Move 2562,Move 2563
0,1021189,2,0,7,10,13,16,19,22,0,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1,1021189,2,0,0,7,10,13,16,19,22,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
2,1021189,2,0,7,10,13,16,19,22,25,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
3,1021189,2,0,7,10,13,16,19,22,100,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
4,1021189,2,0,7,10,13,16,19,25,22,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1


In [15]:
# Create an empty list to store counts for each row
row_action_counts = []

# Iterate through each row of the dataframe
for _, row in train_data.iterrows():
    # Initialize a counter for each time window
    action_count_before_time = 0

    # Initialize a dictionary to store counts for each time window
    counts_before_100 = {}

    count_100 = 1

    # Iterate through each 'Move_XX' column for the current row
    for col in train_data.columns[3:]: 
        # Check if the value is different from -1
        if row[col] != -1:
            action_count_before_time += 1

            # Check if the value is 100
            if row[col] == 100:
                timestamp = count_100 * 5 
                counts_before_100[f't{timestamp}'] = action_count_before_time
                action_count_before_time = -1 
                count_100 += 1

    #If no action is found, set count to 0
    if not action_count_before_time:
        action_count_before_time = 0

    # Append the counts for the current row to the list
    row_action_counts.append(counts_before_100)

In [16]:
# Create a csv from the results
result_df = pd.DataFrame(row_action_counts)

# Add the 'PlayerID' and 'Race' column
result_df.insert(0, 'PlayerID', train_data['PlayerID'])
result_df.insert(1, 'Race', train_data['Race'])

# Save the DataFrame to a CSV file
result_df.to_csv('move_count.csv', index=False)

In [17]:
td = pd.read_csv('move_count.csv', delimiter=';')

In [18]:
td.head()

Unnamed: 0,"PlayerID,Race,t5,t10,t15,t20,t25,t30,t35,t40,t45,t50,t55,t60,t65,t70,t75,t80,t85,t90,t95,t100,t105,t110,t115,t120,t125,t130,t135,t140,t145,t150,t155,t160,t165,t170,t175,t180,t185,t190,t195,t200,t205,t210,t215,t220,t225,t230,t235,t240,t245,t250,t255,t260,t265,t270,t275,t280,t285,t290,t295,t300,t305,t310,t315,t320,t325,t330,t335,t340,t345,t350,t355,t360,t365,t370,t375,t380,t385,t390,t395,t400,t405,t410,t415,t420,t425,t430,t435,t440,t445,t450,t455,t460,t465,t470,t475,t480,t485,t490,t495,t500,t505,t510,t515,t520,t525,t530,t535,t540,t545,t550,t555,t560,t565,t570,t575,t580,t585,t590,t595,t600,t605,t610,t615,t620,t625,t630,t635,t640,t645,t650,t655,t660,t665,t670,t675,t680,t685,t690,t695,t700,t705,t710,t715,t720,t725,t730,t735,t740,t745,t750,t755,t760,t765,t770,t775,t780,t785,t790,t795,t800,t805,t810,t815,t820,t825,t830,t835,t840,t845,t850,t855,t860,t865,t870,t875,t880,t885,t890,t895,t900,t905,t910,t915,t920,t925,t930,t935,t940,t945,t950,t955,t960,t965,t970,t975,t980,t985,t990,t995,t1000,t1005,t1010,t1015,t1020,t1025,t1030,t1035,t1040,t1045,t1050,t1055,t1060,t1065,t1070,t1075,t1080,t1085,t1090,t1095,t1100,t1105,t1110,t1115,t1120,t1125,t1130,t1135,t1140,t1145,t1150,t1155,t1160,t1165,t1170,t1175,t1180,t1185,t1190,t1195,t1200,t1205,t1210,t1215,t1220,t1225,t1230,t1235,t1240,t1245,t1250,t1255,t1260,t1265,t1270,t1275,t1280,t1285,t1290,t1295,t1300,t1305,t1310,t1315,t1320,t1325,t1330,t1335,t1340,t1345,t1350,t1355,t1360,t1365,t1370,t1375,t1380,t1385,t1390,t1395,t1400,t1405,t1410,t1415,t1420,t1425,t1430,t1435,t1440,t1445,t1450,t1455,t1460,t1465,t1470,t1475,t1480,t1485,t1490,t1495,t1500,t1505,t1510,t1515,t1520,t1525,t1530,t1535,t1540,t1545,t1550,t1555,t1560,t1565,t1570,t1575,t1580,t1585,t1590,t1595,t1600,t1605,t1610,t1615,t1620,t1625,t1630,t1635,t1640,t1645,t1650,t1655,t1660,t1665,t1670,t1675,t1680,t1685,t1690,t1695,t1700,t1705,t1710,t1715,t1720,t1725,t1730,t1735,t1740,t1745,t1750,t1755,t1760,t1765,t1770,t1775,t1780,t1785,t1790,t1795,t1800,t1805,t1810,t1815,t1820,t1825,t1830,t1835,t1840,t1845,t1850,t1855,t1860,t1865,t1870,t1875,t1880,t1885,t1890,t1895,t1900,t1905,t1910,t1915,t1920,t1925,t1930,t1935,t1940,t1945,t1950,t1955,t1960,t1965,t1970,t1975,t1980,t1985,t1990,t1995,t2000,t2005,t2010,t2015,t2020,t2025,t2030,t2035,t2040,t2045,t2050,t2055,t2060,t2065,t2070,t2075,t2080,t2085,t2090,t2095,t2100,t2105,t2110,t2115,t2120,t2125,t2130,t2135,t2140,t2145,t2150,t2155,t2160,t2165,t2170"
0,"1021189,2,11.0,8.0,7.0,12.0,7.0,1.0,7.0,7.0,8...."
1,"1021189,2,8.0,9.0,12.0,1.0,11.0,9.0,21.0,12.0,..."
2,"1021189,2,8.0,16.0,5.0,7.0,9.0,12.0,11.0,10.0,..."
3,"1021189,2,7.0,15.0,20.0,10.0,15.0,9.0,7.0,10.0..."
4,"1021189,2,9.0,12.0,8.0,6.0,15.0,15.0,13.0,6.0,..."
