<a href="https://colab.research.google.com/github/hn-iiitd/ML_Project/blob/harsh_hingorani/ML_proj.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import cross_val_score

dataset = pd.read_csv("/content/online_gaming_behavior_data.csv")

y = dataset["InGamePurchases"].values
X = dataset.drop(columns='InGamePurchases')

# Applying one-hot encoding to categorical features
categorical_columns = ['Gender', 'Location', 'GameGenre', 'GameDifficulty', 'EngagementLevel']
X_encoded = pd.get_dummies(X, columns=categorical_columns)

# Applying Min-Max scaling to numeric features
numeric_columns = ['Age', 'PlayTimeHours', 'SessionsPerWeek', 'AvgSessionDurationMinutes', 'PlayerLevel', 'AchievementsUnlocked']
scaler = MinMaxScaler()
X_encoded[numeric_columns] = scaler.fit_transform(X_encoded[numeric_columns])

# Making the feature vector
X = X_encoded.values

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Logistic Regression Model
log_reg = LogisticRegression(C=0.001, solver='lbfgs', max_iter=10000)
log_reg.fit(X_train, y_train)

# Decision Tree Model
decision_tree = DecisionTreeClassifier(criterion='entropy', max_depth=10, min_samples_split=10, min_samples_leaf=2, random_state=42)
decision_tree.fit(X_train, y_train)

# Performance of Logistic Regression
y_pred_log_reg = log_reg.predict(X_test)
log_reg_accuracy = accuracy_score(y_test, y_pred_log_reg)

# Performance of Decision Tree
y_pred_dec_tree = decision_tree.predict(X_test)
dec_tree_accuracy = accuracy_score(y_test, y_pred_dec_tree)

# Results for Logistic Regression $ Decision Tree
print("Logistic Regression Accuracy:", log_reg_accuracy)
print("\nDecision Tree Accuracy:", dec_tree_accuracy)

# Results for Cross-validation accuracy
scores = cross_val_score(log_reg, X_train, y_train, cv=10)
print(f"Cross-validated accuracy: {scores.mean():.2f}")



Logistic Regression Accuracy: 0.7964281253902835

Decision Tree Accuracy: 0.7923067316098414
Cross-validated accuracy: 0.80


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import cross_val_score
from imblearn.over_sampling import SMOTE

# Load dataset
dataset = pd.read_csv("/content/online_gaming_behavior_data.csv")

# Define target variable and features
y = dataset["InGamePurchases"].values
X = dataset.drop(columns='InGamePurchases')

# One-hot encoding of categorical features
categorical_columns = ['Gender', 'Location', 'GameGenre', 'GameDifficulty', 'EngagementLevel']
X_encoded = pd.get_dummies(X, columns=categorical_columns)

# Min-Max scaling of numeric features
numeric_columns = ['Age', 'PlayTimeHours', 'SessionsPerWeek', 'AvgSessionDurationMinutes', 'PlayerLevel', 'AchievementsUnlocked']
scaler = MinMaxScaler()
X_encoded[numeric_columns] = scaler.fit_transform(X_encoded[numeric_columns])

# Convert to numpy array
X = X_encoded.values

# Apply SMOTE to handle class imbalance
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Logistic Regression with class weighting
log_reg = LogisticRegression(C=0.001, solver='lbfgs', max_iter=10000)
log_reg.fit(X_train, y_train)

# Decision Tree with class weighting
decision_tree = DecisionTreeClassifier(criterion='entropy', max_depth=10, min_samples_split=10, min_samples_leaf=2, random_state=42)
decision_tree.fit(X_train, y_train)

# Performance of Logistic Regression
y_pred_log_reg = log_reg.predict(X_test)
log_reg_accuracy = accuracy_score(y_test, y_pred_log_reg)

# Performance of Decision Tree
y_pred_dec_tree = decision_tree.predict(X_test)
dec_tree_accuracy = accuracy_score(y_test, y_pred_dec_tree)

# Results
print("Logistic Regression Accuracy:", log_reg_accuracy)
print("\nDecision Tree Accuracy:", dec_tree_accuracy)

# Cross-validation accuracy
scores = cross_val_score(log_reg, X_train, y_train, cv=10)
print(f"Cross-validated accuracy (Logistic Regression): {scores.mean():.2f}")


Logistic Regression Accuracy: 0.5085169557743398

Decision Tree Accuracy: 0.8598999843725582
Cross-validated accuracy (Logistic Regression): 0.51
