# **CrashGame prediction using Scikit-Learn**

### Purpose
It is a prediction of the next result based on the data using machine learning.

## 1. Load Data

In [42]:
import pandas as pd
main_df1 = pd.read_csv('aviator_dataset_clean.csv')

multipliers_df1 = pd.read_csv('multipliers.csv')

**Summary of df**

In [45]:
#Length of df
len(main_df), len(multipliers_df)

(10000, 10000)

In [44]:
#tim dataframe to 10000
main_df = main_df1.head(10000)
multipliers_df = multipliers_df.head(10000)

In [46]:
#columns main
main_df.columns

Index(['color', 'mean', 'var', 'next_approximate', 'target'], dtype='object')

In [47]:
#columns multipliers
multipliers_df.columns

Index(['Multiplier'], dtype='object')

In [48]:
main_df.head()

Unnamed: 0,color,mean,var,next_approximate,target
0,1,1.41,1.41,2.82,2.8
1,1,4.67,2.7,7.38,6.55
2,0,3.83,1.71,5.54,1.1
3,0,1.08,1.79,2.87,1.06
4,0,1.47,1.47,2.94,1.88


In [49]:
main_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   color             10000 non-null  int64  
 1   mean              10000 non-null  float64
 2   var               10000 non-null  float64
 3   next_approximate  10000 non-null  float64
 4   target            10000 non-null  float64
dtypes: float64(4), int64(1)
memory usage: 390.8 KB


In [50]:
main_df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
color,10000.0,0.4283,0.494857,0.0,0.0,0.0,1.0,1.0
mean,10000.0,2.538178,1.332315,1.0,1.5,2.13,3.22,9.63
var,10000.0,1.715134,1.103064,1.13,1.2,1.41,1.69,18.01
next_approximate,10000.0,4.253191,2.253639,2.82,2.96,3.3,4.53,27.64
target,10000.0,2.538185,1.894989,1.0,1.24,1.77,3.09,9.98


In [51]:
multipliers_df.head()

Unnamed: 0,Multiplier
0,2.8
1,6.55
2,1.1
3,1.06
4,1.88


## 2. Feature Engineering

- x variable - features
- y variable - target

In [52]:
x = main_df.drop('target', axis = 1)
y = main_df['target']

- multipliers will be included as an additional feature

In [53]:
# x['multipliers'] = multipliers_df['Multiplier']

## 3. Train-test split

In [54]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 42)

## 4. Train Model

In [55]:
%timeit
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

clf = RandomForestRegressor(n_estimators = 100, random_state = 42)

In [56]:
#Train Model
clf.fit(x_train, y_train)

In [57]:
#make predictions on the test set
y_pred = clf.predict(x_test)

In [58]:
y_pred

array([4.91875495, 3.22566653, 3.78523746, ..., 2.85526752, 1.15193662,
       1.09040606])

In [59]:
#Evaluate model accuracy

mse = mean_squared_error(y_test, y_pred) #Mean squared error
rsme = mse ** 0.5 # root mean squared error
r2 = r2_score(y_test, y_pred) # r squared

In [60]:
mse, rsme, r2

(1.110644242052153, 1.0538710746823603, 0.699262374750389)

## 5. Evaluate the model

In [61]:
clf.score(x_train, y_train)

0.8678814189462214

In [62]:
clf.score(x_test, y_test)

0.699262374750389

## 6. Fine tune the model

In [63]:
from sklearn.model_selection import GridSearchCV
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20]
}

In [64]:
# GridSearchCV for tuning
grid_search = GridSearchCV(RandomForestRegressor(random_state=42), param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(x_train, y_train)

In [65]:
# Best parameters from the search
best_params = grid_search.best_params_
print(f'Best parameters: {best_params}')

Best parameters: {'max_depth': 10, 'n_estimators': 200}


In [81]:
# Get the best model and evaluate
best_model = grid_search.best_estimator_
y_pred_best = best_model.predict(x_test)
best_mse = mean_squared_error(y_test, y_pred_best)
best_rmse = best_mse ** 0.5

In [82]:
print(f'Best Model MSE: {best_mse}')
print(f'Best Model RMSE: {best_rmse}')

Best Model MSE: 0.9141843241344605
Best Model RMSE: 0.9561298678184154


## 7. Save and load model

In [83]:
import pickle
pickle.dump(clf, open('Crash-Game-Random-Forest-Model.pkl', 'wb'))

In [84]:
crash_model = pickle.load(open('Crash-Game-Random-Forest-Model.pkl', 'rb'))

In [86]:
crash_model.score(x_test, y_test)

0.699262374750389