In [20]:
import numpy as np
import pandas as pd
import itertools
import random
from tqdm import tqdm

from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

import matplotlib.pyplot as plt

In [23]:
fair_data = pd.read_csv("train_data/synthetic/fair_50-games.csv").drop(["Unnamed: 0"], axis=1)
fair_data.head()

Unnamed: 0,cheat_flag,frame_count,session_id,score,combo,is_combo_going,time_left_seconds,can_dash,b1,b2,b3,b4
0,0,1,8455078507,0,0,0,43.868229,1,2.999291,0.083167,2.992171,0.126076
1,0,2,8455078507,0,0,0,43.851845,1,2.999809,0.097893,2.993055,0.121729
2,0,3,8455078507,0,0,0,43.835456,1,2.999999,0.113775,2.993886,0.117455
3,0,4,8455078507,0,0,0,43.819062,1,2.999863,0.1308,2.994664,0.113254
4,0,5,8455078507,0,0,0,43.802663,1,2.999399,0.148953,2.99539,0.109127


In [24]:
cheat_data = pd.read_csv("train_data/synthetic/cheat-1_no-cooldown_50-games.csv").drop(["Unnamed: 0"], axis=1)
cheat_data.head()

Unnamed: 0,cheat_flag,frame_count,session_id,score,combo,is_combo_going,time_left_seconds,can_dash,b1,b2,b3,b4
0,1,1,3066053919,0,0,0,41.244696,1,0,0,0,0
1,1,2,3066053919,0,0,0,41.226906,1,0,0,0,0
2,1,3,3066053919,0,0,0,41.209092,1,0,0,0,0
3,1,4,3066053919,0,0,0,41.191254,1,0,0,0,0
4,1,5,3066053919,0,0,0,41.173391,1,0,0,0,0


In [25]:
X = pd.concat([df.copy().drop(["cheat_flag", "session_id"], axis=1) for df in [fair_data, cheat_data]])
X.shape

(491248, 10)

In [26]:
y = pd.concat([df["cheat_flag"] for df in [fair_data, cheat_data]])
y.shape

(491248,)

In [35]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.4, random_state=42
)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((294748, 10), (196500, 10), (294748,), (196500,))

In [28]:
names = [
    "Nearest Neighbors",
    # "Linear SVM",
    # "RBF SVM",
    # "Gaussian Process",
    "Decision Tree",
    "Random Forest",
    # "Neural Net",
    "AdaBoost",
    "Naive Bayes",
    "QDA",
]

classifiers = [
    KNeighborsClassifier(3),
    # SVC(kernel="linear", C=0.025, random_state=42),
    # SVC(gamma=2, C=1, random_state=42),
    # GaussianProcessClassifier(1.0 * RBF(1.0), random_state=42),
    DecisionTreeClassifier(max_depth=5, random_state=42),
    RandomForestClassifier(
        max_depth=5, n_estimators=10, max_features=1, random_state=42
    ),
    # MLPClassifier(alpha=1, max_iter=1000, random_state=42),
    AdaBoostClassifier(random_state=42),
    GaussianNB(),
    QuadraticDiscriminantAnalysis(),
]

In [29]:
# iterate over classifiers
for name, clf in zip(names, classifiers):
    clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)
    print(f"{name} score: {score}")


Nearest Neighbors score: 0.999119592875318
Decision Tree score: 1.0
Random Forest score: 1.0
AdaBoost score: 1.0
Naive Bayes score: 1.0
QDA score: 0.38525190839694656


  X2 = np.dot(Xm, R * (S ** (-0.5)))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])


In [36]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

# Assuming X_train is a numpy array of shape (X, 10)
# and y_train is a numpy array of shape (X,)
X_train = torch.FloatTensor(X_train.to_numpy())
y_train = torch.LongTensor(y_train.to_numpy())

# Hyperparameters
input_size = 10
hidden_size = 50
output_size = 2
num_layers = 1
batch_size = 64
learning_rate = 0.001
num_epochs = 5

# Define the LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])  # Take the output from the last time step
        return out

# Instantiate the model, loss function, and optimizer
model = LSTMModel(input_size, hidden_size, output_size, num_layers)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Prepare DataLoader
dataset = TensorDataset(X_train.unsqueeze(1), y_train)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Training loop
for epoch in range(num_epochs):
    for inputs, labels in dataloader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}')

# Now you can use the trained model for prediction or further evaluation


Epoch [1/5], Loss: 0.035560306161642075
Epoch [2/5], Loss: 0.04866434261202812
Epoch [3/5], Loss: 0.013931037858128548
Epoch [4/5], Loss: 0.0046037910506129265
Epoch [5/5], Loss: 0.0002790148719213903


In [39]:
model.eval()
running_corrects = 0

with torch.no_grad():

    for inputs, labels in dataloader:
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)

        running_corrects += torch.sum(preds == labels).item()

running_corrects / len(dataset)

1.0