In [13]:
import numpy as np
import pandas as pd
import itertools
import random
from tqdm import tqdm
from ast import literal_eval
import time

from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

import matplotlib.pyplot as plt

In [14]:
CLASS_1 = 0
CLASS_2 = 1

In [15]:
config_dict = {
    0: "fair",
    1: "cheat-1_no-cooldown",
    2: "cheat-2_no-dash",
    3: "cheat-3_inf-slowmo",
    4: "cheat-4_huge-damage",
    5: "cheat-5_frozen-ninja",
}

data_name_1 = config_dict[CLASS_1]
data_name_2 = config_dict[CLASS_2]

In [16]:
def preprocess_gt_csv(data_name):
    filename = f"train_data/gt/{data_name}.csv"

    columns_to_eval = [
        "global_mouse_position",
        "button_cooldown_times",
        "logical_frames_since_last_button_press",
        "global_position",
        "velocity",
        "fruits_cut_this_frame_list",
        "fruits_hit_ninjas_ass_this_frame_list",
        "fruits_on_screen_this_frame_list",
        "fruits_spawned_this_frame_list",
        "stupid_fucking_fruits_that_died_this_frame_list",
    ]

    df = pd.read_csv(filename, converters={column: literal_eval for column in columns_to_eval})

    df["fruits_spawned_num"] = df["fruits_spawned_this_frame_list"].str.len()
    df["fruits_on_screen_num"] = df["fruits_on_screen_this_frame_list"].str.len()
    df["fruits_hit_num"] = df["fruits_hit_ninjas_ass_this_frame_list"].str.len()
    df["fruits_missed_num"] = df["stupid_fucking_fruits_that_died_this_frame_list"].str.len()

    projectiles = ["Apple", "Banana", "Grape", "Pineapple", "Watermelon", "Coconut", "Cherry"]

    df["fruit_name_spawned"] = df["fruits_spawned_this_frame_list"].apply(lambda x: projectiles.index(x[0]["name"]) if len(x) != 0 else -1)

    for butt in ["b1", "b2", "b3", "b4"]:
        df[butt] = df["button_cooldown_times"].apply(lambda j: j[butt])

    general_needed = [
        "cheat_flag",
        "frame_count",
        "session_id",
        "score",
        "combo",
        "is_combo_going",
        "time_left_seconds",
        "can_dash",
    ]
    buttons_needed = [
        "b1", "b2", "b3", "b4",
    ]

    df = df[general_needed + buttons_needed]
    df[general_needed] = df[general_needed].astype(int)

    if data_name == "cheat-4_huge-damage":  # its too fucking huge
        df["score"] *= 0.0001

    return df

In [17]:
def preprocess_synthetic_csv(data_name):
    filename = f"train_data/synthetic/{data_name}_50-games.csv"
    df = pd.read_csv(filename).drop(["Unnamed: 0"], axis=1)
    return df

In [18]:
fair_synthetic_data = preprocess_synthetic_csv(data_name_1)
fair_gt_data = preprocess_gt_csv(data_name_1)

fair_synthetic_data.head()

Unnamed: 0,cheat_flag,frame_count,session_id,score,combo,is_combo_going,time_left_seconds,can_dash,b1,b2,b3,b4
0,0,1,8455078507,0,0,0,43.868229,1,2.999291,0.083167,2.992171,0.126076
1,0,2,8455078507,0,0,0,43.851845,1,2.999809,0.097893,2.993055,0.121729
2,0,3,8455078507,0,0,0,43.835456,1,2.999999,0.113775,2.993886,0.117455
3,0,4,8455078507,0,0,0,43.819062,1,2.999863,0.1308,2.994664,0.113254
4,0,5,8455078507,0,0,0,43.802663,1,2.999399,0.148953,2.99539,0.109127


In [19]:
cheat_synthetic_data = preprocess_synthetic_csv(data_name_2)
cheat_gt_data = preprocess_gt_csv(data_name_2)

cheat_synthetic_data.head()

Unnamed: 0,cheat_flag,frame_count,session_id,score,combo,is_combo_going,time_left_seconds,can_dash,b1,b2,b3,b4
0,1,1,3066053919,0,0,0,41.244696,1,0,0,0,0
1,1,2,3066053919,0,0,0,41.226906,1,0,0,0,0
2,1,3,3066053919,0,0,0,41.209092,1,0,0,0,0
3,1,4,3066053919,0,0,0,41.191254,1,0,0,0,0
4,1,5,3066053919,0,0,0,41.173391,1,0,0,0,0


In [20]:
X_synthetic = pd.concat([df.copy().drop(["cheat_flag", "session_id"], axis=1) for df in [fair_synthetic_data, cheat_synthetic_data]])
y_synthetic = pd.concat([df["cheat_flag"] for df in [fair_synthetic_data, cheat_synthetic_data]])

X_synthetic.shape, y_synthetic.shape

((491248, 10), (491248,))

In [21]:
X_gt = pd.concat([df.copy().drop(["cheat_flag", "session_id"], axis=1) for df in [fair_gt_data, cheat_gt_data]])
y_gt = pd.concat([df["cheat_flag"] for df in [fair_gt_data, cheat_gt_data]])

X_gt.shape, y_gt.shape

((62104, 10), (62104,))

In [22]:
X_train, X_test, y_train, y_test = train_test_split(
    X_synthetic, y_synthetic, test_size=0.4, random_state=42, shuffle=True
)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((294748, 10), (196500, 10), (294748,), (196500,))

In [23]:
names = [
    "Nearest Neighbors",
    "Decision Tree",
    "Random Forest",
    "Neural Net",
    "Naive Bayes",
    "QDA",
]

classifiers = [
    KNeighborsClassifier(),
    DecisionTreeClassifier( random_state=42),
    RandomForestClassifier(random_state=42),
    MLPClassifier(max_iter=1000, random_state=42),
    GaussianNB(),
    QuadraticDiscriminantAnalysis(),
]

In [12]:
for name, clf in zip(names, classifiers):
    print(f"{name} model:")

    start = time.time()
    clf.fit(X_train, y_train)
    end = time.time()

    train_score = clf.score(X_train, y_train)
    val_score = clf.score(X_test, y_test)
    gt_score = clf.score(X_gt, y_gt)

    print(f"Train time: {(end - start):.1f} sec,  Train score: {train_score:.3f},  Val score: {val_score:.3f},  Gt score: [ {gt_score:.3f} ]")
    print("-" * 150)

Nearest Neighbors model:
Train time: 0.2 sec,  Train score: 0.999,  Val score: 0.998,  Gt score: [ 0.545 ]
------------------------------------------------------------------------------------------------------------------------------------------------------
Decision Tree model:
Train time: 0.2 sec,  Train score: 1.000,  Val score: 1.000,  Gt score: [ 0.991 ]
------------------------------------------------------------------------------------------------------------------------------------------------------
Random Forest model:
Train time: 7.0 sec,  Train score: 1.000,  Val score: 1.000,  Gt score: [ 0.997 ]
------------------------------------------------------------------------------------------------------------------------------------------------------
Neural Net model:
Train time: 17.6 sec,  Train score: 1.000,  Val score: 1.000,  Gt score: [ 0.661 ]
-------------------------------------------------------------------------------------------------------------------------------------

  X2 = np.dot(Xm, R * (S ** (-0.5)))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])


In [None]:
# class LSTMModel(nn.Module):
#     def __init__(self, input_size, hidden_size, output_size, num_layers):
#         super(LSTMModel, self).__init__()
#         self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
#         self.fc = nn.Linear(hidden_size, output_size)
#
#     def forward(self, x):
#         out, _ = self.lstm(x)
#         out = self.fc(out[:, -1, :])  # Take the output from the last time step
#         return out

In [24]:
from torch.autograd import Variable
device = torch.device("cpu")

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, num_layers):
        super(LSTMModel, self).__init__()

        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.dropout = nn.Dropout(p=0.2)

        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout = 0.25
        )

        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size).to(device))

        c_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size).to(device))

        # Propagate input through LSTM
        ula, (h_out, _) = self.lstm(x, (h_0, c_0))

        h_out = h_out.view(-1, self.hidden_size)

        out = self.fc(h_out)
        out = self.dropout(out)

        return out

In [25]:
input_size = 10
hidden_size = 50
output_size = 2
num_layers = 1

learning_rate = 0.001
num_epochs = 5
batch_size = 60

In [26]:
train_subset = TensorDataset(
    torch.FloatTensor(X_synthetic.to_numpy()).unsqueeze(1),
    torch.LongTensor(y_synthetic.to_numpy()),
)
train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)


val_subset = TensorDataset(
    torch.FloatTensor(X_gt[: len(X_gt) // 4].to_numpy()).unsqueeze(1),
    torch.LongTensor(y_gt[: len(y_gt) // 4].to_numpy()),
)
val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=True)


gt_dataset = TensorDataset(
    torch.FloatTensor(X_gt[len(X_gt) // 4 :].to_numpy()).unsqueeze(1),
    torch.LongTensor(y_gt[len(y_gt) // 4 :].to_numpy()),
)
gt_loader = DataLoader(gt_dataset, batch_size=batch_size, shuffle=True)

In [27]:
model = LSTMModel(input_size, hidden_size, output_size, num_layers)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", patience=0, threshold=1, factor=0.5, threshold_mode="abs", min_lr=0.0005, verbose=True)




In [28]:
for epoch in range(num_epochs):
    train_loss = 0
    train_corrects = 0

    model.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()

        outputs = model(inputs)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        train_corrects += torch.sum(preds == labels).item()

    train_loss /= len(train_loader)
    train_acc = train_corrects / len(train_subset) * 100
    print(f"\n[ Train    {epoch + 1}/{num_epochs} ] loss: {train_loss:.4f}, accuracy: [ {train_acc:<7.4f}% ]")


    val_loss = 0
    val_corrects = 0

    model.eval()
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)

            loss = criterion(outputs, labels)

            val_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            val_corrects += torch.sum(preds == labels).item()

    val_loss /= len(val_loader)
    val_acc = val_corrects / len(val_subset) * 100
    print(f"[ Validation {epoch + 1}/{num_epochs} ] loss: {val_loss:.4f}, accuracy: [ {val_acc:<7.4f}% ]")


    gt_corrects = 0

    model.eval()
    with torch.no_grad():
        for inputs, labels in gt_loader:
            outputs = model(inputs)

            _, preds = torch.max(outputs, 1)
            gt_corrects += torch.sum(preds == labels).item()

    gt_acc = gt_corrects / len(gt_dataset) * 100
    print(f"[ Gt         {epoch + 1}/{num_epochs} ], accuracy: [ {gt_acc:<7.4f}% ]")

    scheduler.step(val_loss)


[ Train    1/5 ] loss: 0.1587, accuracy: [ 94.6003% ]
[ Validation 1/5 ] loss: 0.0937, accuracy: [ 98.5830% ]
[ Gt         1/5 ], accuracy: [ 81.3238% ]



KeyboardInterrupt



In [131]:
# torch.save(model.state_dict(), f"weights/lstm_{CLASS_1}-{CLASS_2}.pth")

(tensor(0.1833, dtype=torch.float64), 0.0)