In [2]:
import pandas as pd
import numpy as np
import os

import joblib
from sklearn.preprocessing import StandardScaler

In [2]:
def concatenateData():
    """Merge all character data in a single DataFrame & filter required cols"""
    data = pd.DataFrame()
    dataDirectory = 'training_data_p2/'

    # Defining expected dtypes for each column
    dtype_dict = {
        "frame": int,
        "p1Id": int,
        "p1Health": int,
        "p1PosX": int,
        "p1PosY": int,
        "p1Jump": int,
        "p1Crouch": int,
        "p1InMove": int,
        "p1MoveId": int,
        "p1Up": int,
        "p1Down": int,
        "p1Left": int,
        "p1Right": int,
        "p1Select": int,
        "p1Start": int,
        "p1Y": int,
        "p1B": int,
        "p1X": int,
        "p1A": int,
        "p1L": int,
        "p1R": int,
        "p2Id": int,
        "p2Health": int,
        "p2PosX": int,
        "p2PosY": int,
        "p2Jump": int,
        "p2Crouch": int,
        "p2InMove": int,
        "p2MoveId": int,
        "p2Up": int,
        "p2Down": int,
        "p2Left": int,
        "p2Right": int,
        "p2Select": int,
        "p2Start": int,
        "p2Y": int,
        "p2B": int,
        "p2X": int,
        "p2A": int,
        "p2L": int,
        "p2R": int,
        "timer": int,
        "roundStarted": int,
        "roundOver": int,
        "fightResult": str
    }

    column_names = [
        "frame", "p1Id", "p1Health", "p1PosX", "p1PosY", "p1Jump", "p1Crouch", "p1InMove", "p1MoveId",
        "p1Up", "p1Down", "p1Left", "p1Right", "p1Select", "p1Start", "p1Y", "p1B", "p1X", "p1A", "p1L", "p1R",
        "p2Id", "p2Health", "p2PosX", "p2PosY", "p2Jump", "p2Crouch", "p2InMove", "p2MoveId",
        "p2Up", "p2Down", "p2Left", "p2Right", "p2Select", "p2Start", "p2Y", "p2B", "p2X", "p2A", "p2L", "p2R",
        "timer", "roundStarted", "roundOver", "fightResult"
    ]

    for filename in os.listdir(dataDirectory):
        filepath = os.path.join(dataDirectory, filename)
        df = pd.read_csv(filepath, low_memory=False, dtype=dtype_dict, skiprows=1, names=column_names)
        data = pd.concat([data, df], ignore_index=True)

    data = data[data['roundStarted'] != False]

    # dropping cuz not needed
    data = data.drop(['frame', 'roundStarted', 'fightResult', 'roundOver'], axis=1)

    data['xDist'] = data['p1PosX'] - data['p2PosX']
    data['yDist'] = data['p1PosY'] - data['p2PosY']
    data = data.drop(columns=['p1PosX', 'p1PosY', 'p2PosX', 'p2PosY'], axis=1)
    # print(data.dtypes)
    # print(data.sample(5))
    return data

def normaliseFeatures(data):
    """Normalise yDist, xDist, health & timer values"""
    # using StandardScalar here because it has advantages > standard max val normalisation
    scaler = StandardScaler()
    featuresToNormalise = ['p1Health', 'p2Health', 'timer', 'xDist', 'yDist']
    data[featuresToNormalise] = scaler.fit_transform(data[featuresToNormalise])

    # saving the scaler for later use
    joblib.dump(scaler, 'scaler.joblib')

    return data


In [3]:
column_names = [
        "frame", "p1Id", "p1Health", "p1PosX", "p1PosY", "p1Jump", "p1Crouch", "p1InMove", "p1MoveId",
        "p1Up", "p1Down", "p1Left", "p1Right", "p1Select", "p1Start", "p1Y", "p1B", "p1X", "p1A", "p1L", "p1R",
        "p2Id", "p2Health", "p2PosX", "p2PosY", "p2Jump", "p2Crouch", "p2InMove", "p2MoveId",
        "p2Up", "p2Down", "p2Left", "p2Right", "p2Select", "p2Start", "p2Y", "p2B", "p2X", "p2A", "p2L", "p2R",
        "timer", "roundStarted", "roundOver", "fightResult"
    ]
dtype_dict = {
        "frame": int,
        "p1Id": int,
        "p1Health": int,
        "p1PosX": int,
        "p1PosY": int,
        "p1Jump": int,
        "p1Crouch": int,
        "p1InMove": int,
        "p1MoveId": int,
        "p1Up": int,
        "p1Down": int,
        "p1Left": int,
        "p1Right": int,
        "p1Select": int,
        "p1Start": int,
        "p1Y": int,
        "p1B": int,
        "p1X": int,
        "p1A": int,
        "p1L": int,
        "p1R": int,
        "p2Id": int,
        "p2Health": int,
        "p2PosX": int,
        "p2PosY": int,
        "p2Jump": int,
        "p2Crouch": int,
        "p2InMove": int,
        "p2MoveId": int,
        "p2Up": int,
        "p2Down": int,
        "p2Left": int,
        "p2Right": int,
        "p2Select": int,
        "p2Start": int,
        "p2Y": int,
        "p2B": int,
        "p2X": int,
        "p2A": int,
        "p2L": int,
        "p2R": int,
        "timer": int,
        "roundStarted": int,
        "roundOver": int,
        "fightResult": str
    }

df = pd.read_csv('training_data_p2/Balrog', low_memory=False, dtype=dtype_dict, skiprows=1, names=column_names)
df.to_excel('output.xlsx', index=False)

In [None]:
data = concatenateData()
data = normaliseFeatures(data)
data.to_excel('output.xlsx', index=False)

In [5]:
import matplotlib.pyplot as plt

# Load and preprocess the data
data = concatenateData()
data = normaliseFeatures(data)

# Filter movement and attack keys for p1 and p2
movement_attack_keys = ['p1Up', 'p1Down', 'p1Left', 'p1Right', 'p1Y', 'p1B', 'p1X', 'p1A', 
                        'p2Up', 'p2Down', 'p2Left', 'p2Right', 'p2Y', 'p2B', 'p2X', 'p2A']

filtered_data = data[movement_attack_keys]
filtered_data = filtered_data[(filtered_data == 1).any(axis=1)]
print(data[movement_attack_keys].head())
# Plot the data
p1_keys = ['p1Up', 'p1Down', 'p1Left', 'p1Right', 'p1Y', 'p1B', 'p1X', 'p1A', 'p1PosX', 'p1PosY']
p2_keys = ['p2Up', 'p2Down', 'p2Left', 'p2Right', 'p2Y', 'p2B', 'p2X', 'p2A', 'p2PosX', 'p2PosY']

p1_counts = filtered_data[p1_keys].sum()
p2_counts = filtered_data[p2_keys].sum()
print("P1 counts:")
print(p1_counts)

print("P2 counts:")
print(p2_counts)
plt.figure(figsize=(10, 6))
plt.bar(p1_keys, p1_counts, label='Player 1', alpha=0.7)
plt.bar(p2_keys, p2_counts, label='Player 2', alpha=0.7)
plt.xlabel('Keys')
plt.ylabel('Count')
plt.title('Player 1 vs Player 2 Movement and Attack Keys (Values = 1)')
plt.legend()
plt.show()

     p1Up  p1Down  p1Left  p1Right  p1Y  p1B  p1X  p1A  p2Up  p2Down  p2Left  \
0       0       0       0        0    0    0    0    0     0       0       0   
1       0       0       0        0    0    0    0    0     0       0       0   
2       0       0       0        0    0    0    0    0     0       0       0   
272     0       0       1        0    0    0    0    0     0       0       0   
273     0       0       1        0    0    0    0    0     0       0       0   

     p2Right  p2Y  p2B  p2X  p2A  
0          0    0    0    0    0  
1          0    0    0    0    0  
2          0    0    0    0    0  
272        0    0    0    0    0  
273        0    0    0    0    0  


KeyError: "['p1PosX', 'p1PosY'] not in index"