# 1. Train Custom Model

## 1-1. Read in Collected Data and Process

In [15]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split

data_dir = "data/csv/"
data_path = os.path.join(data_dir, 'coords.csv')

model_dir = "trained_model/ml/"
model_path = os.path.join(model_dir, 'face_estimator.pkl')

In [2]:
Raw_data = pd.read_csv(data_path)

In [3]:
Raw_data.head()

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z499,v499,x500,y500,z500,v500,x501,y501,z501,v501
0,Cheat,0.573095,0.573762,-1.126435,0.999983,0.602292,0.484688,-1.072092,0.999966,0.62661,...,-0.02621,0.0,0.648445,0.481979,-0.00453,0.0,0.654291,0.473195,-0.004386,0.0
1,Cheat,0.568429,0.574003,-1.222315,0.99998,0.601604,0.490162,-1.161438,0.999959,0.626068,...,-0.027023,0.0,0.649495,0.482035,-0.005017,0.0,0.655139,0.473306,-0.00486,0.0
2,Cheat,0.568669,0.57557,-1.254395,0.999974,0.60268,0.492227,-1.192295,0.999945,0.626835,...,-0.025951,0.0,0.64932,0.480081,-0.003594,0.0,0.654881,0.471034,-0.003284,0.0
3,Cheat,0.569152,0.575719,-1.267661,0.999972,0.603756,0.492222,-1.205576,0.999939,0.627551,...,-0.026457,0.0,0.64977,0.476933,-0.003568,0.0,0.65515,0.469058,-0.003366,0.0
4,Cheat,0.571123,0.576281,-1.26097,0.999968,0.604641,0.492222,-1.198091,0.999931,0.628456,...,-0.02737,0.0,0.649668,0.479976,-0.006268,0.0,0.6551,0.471294,-0.006187,0.0


In [4]:
X = Raw_data.drop(['class'], axis=1) # parse Features
y = Raw_data['class'] # parse target value

In [6]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [7]:
y_train.unique()

array(['noCheat', 'Cheat'], dtype=object)

## 1-2. Train Models

In [8]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

from tqdm import tqdm

In [11]:
pipelines = {
    'lr' : make_pipeline(StandardScaler(), LogisticRegression(max_iter=500)),
    'rc' : make_pipeline(StandardScaler(), RidgeClassifier()),
    'rf' : make_pipeline(StandardScaler(), RandomForestClassifier()),
    'gb' : make_pipeline(StandardScaler(), GradientBoostingClassifier())
}

In [12]:
fit_models = {}

for algo, pipeline in tqdm(pipelines.items()):
    model = pipeline.fit(X_train, y_train)
    fit_models[algo] = model

100%|██████████| 4/4 [00:14<00:00,  3.71s/it]


## 1-3. Evaluate and Serialize Model

In [13]:
from sklearn.metrics import accuracy_score
import pickle

In [14]:
for algo, model in tqdm(fit_models.items()):
    predict = model.predict(X_val)
    print(algo, accuracy_score(y_val, predict))

100%|██████████| 4/4 [00:00<00:00, 38.78it/s]lr 1.0
rc 1.0
rf 1.0
gb 0.9857142857142858



In [16]:
with open(model_path, 'wb') as f:
    pickle.dump(fit_models['rf'], f)