In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder

In [None]:
# Load Data
df = pd.read_csv("online_gaming_insights.csv")

In [3]:
df.describe()

Unnamed: 0,PlayerID,Age,PlayTimeHours,InGamePurchases,SessionsPerWeek,AvgSessionDurationMinutes,PlayerLevel,AchievementsUnlocked
count,40034.0,40034.0,40034.0,40034.0,40034.0,40034.0,40034.0,40034.0
mean,29016.5,31.992531,12.024365,0.200854,9.471774,94.792252,49.655568,24.526477
std,11556.964675,10.043227,6.914638,0.400644,5.763667,49.011375,28.588379,14.430726
min,9000.0,15.0,0.000115,0.0,0.0,10.0,1.0,0.0
25%,19008.25,23.0,6.067501,0.0,4.0,52.0,25.0,12.0
50%,29016.5,32.0,12.008002,0.0,9.0,95.0,49.0,25.0
75%,39024.75,41.0,17.963831,0.0,14.0,137.0,74.0,37.0
max,49033.0,49.0,23.999592,1.0,19.0,179.0,99.0,49.0


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40034 entries, 0 to 40033
Data columns (total 13 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   PlayerID                   40034 non-null  int64  
 1   Age                        40034 non-null  int64  
 2   Gender                     40034 non-null  object 
 3   Location                   40034 non-null  object 
 4   GameGenre                  40034 non-null  object 
 5   PlayTimeHours              40034 non-null  float64
 6   InGamePurchases            40034 non-null  int64  
 7   GameDifficulty             40034 non-null  object 
 8   SessionsPerWeek            40034 non-null  int64  
 9   AvgSessionDurationMinutes  40034 non-null  int64  
 10  PlayerLevel                40034 non-null  int64  
 11  AchievementsUnlocked       40034 non-null  int64  
 12  EngagementLevel            40034 non-null  object 
dtypes: float64(1), int64(7), object(5)
memory usag

In [None]:
# Set a random seed for reproducibility
np.random.seed(42)

In [6]:
df.head(20)

Unnamed: 0,PlayerID,Age,Gender,Location,GameGenre,PlayTimeHours,InGamePurchases,GameDifficulty,SessionsPerWeek,AvgSessionDurationMinutes,PlayerLevel,AchievementsUnlocked,EngagementLevel
0,9000,43,Male,Other,Strategy,16.271119,0,Medium,6,108,79,25,Medium
1,9001,29,Female,USA,Strategy,5.525961,0,Medium,5,144,11,10,Medium
2,9002,22,Female,USA,Sports,8.223755,0,Easy,16,142,35,41,High
3,9003,35,Male,USA,Action,5.265351,1,Easy,9,85,57,47,Medium
4,9004,33,Male,Europe,Action,15.531945,0,Medium,2,131,95,37,Medium
5,9005,37,Male,Europe,RPG,20.561855,0,Easy,2,81,74,22,Low
6,9006,25,Male,USA,Action,9.752716,0,Hard,1,50,13,2,Low
7,9007,25,Female,Asia,RPG,4.401729,0,Medium,10,48,27,23,Medium
8,9008,38,Female,Europe,Simulation,18.152733,0,Easy,5,101,23,41,Medium
9,9009,38,Female,Other,Sports,23.942772,0,Easy,13,95,99,36,High


In [None]:
# Encode the target variable
le = LabelEncoder()
encoded_EL_data = le.fit_transform(df['EngagementLevel'])

In [None]:
# Print the Encoded data
print(encoded_EL_data[:10])

[2 2 0 2 2 1 1 2 2 0]


In [None]:
# Model Training with Robust Splitting
X = df[['PlayTimeHours','SessionsPerWeek','AvgSessionDurationMinutes','PlayerLevel']]
y = encoded_EL_data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4, stratify= y)

In [10]:
print(X.shape)
print(y.shape)


(40034, 4)
(40034,)


In [11]:
print(f"Training data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")

Training data shape: (32027, 4)
Testing data shape: (8007, 4)


In [None]:
# Model Selection and Initialize RandomForestClassifier
model = RandomForestClassifier(n_estimators=100, random_state=42)

In [None]:
# Model Fitting
model.fit(X_train, y_train)

In [None]:
# Model Prediction
y_pred = model.predict(X_test)

In [None]:
# Accuracy Calculation
accuracy = accuracy_score(y_test, y_pred)
accuracy

0.8927188709878856

In [None]:
# Comprehensive Evaluation & Interpretation
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.91      0.88      0.90      2067
           1       0.87      0.86      0.86      2065
           2       0.90      0.92      0.91      3875

    accuracy                           0.89      8007
   macro avg       0.89      0.89      0.89      8007
weighted avg       0.89      0.89      0.89      8007

