# MCLabs Churn Analyzer - Model Creation

This Jupyter Notebook will create a ML model, train it on our training data, then offer a simple test analysis using test data.

In [14]:
'''
MODULE/PACKAGE IMPORTS
'''

# System
import os
import re
from glob import glob
from dotenv import load_dotenv
from datetime import datetime, timedelta

# Data
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Pipelining
import joblib

# Output/Display
from tqdm import tqdm

In [None]:
'''
PIPELINE CREATION

This section will create a pipeline for loading the data, splitting the data, scaling the data, and training the model.
'''

# Load the data
modelInputDataframe = pd.read_csv("../data/targetted/public/1756184400/targetted.csv")

# Separate features from target
MCA_Features = modelInputDataframe.drop(columns=["churn"])
MCA_Target = modelInputDataframe["churn"]

# Split the data
MCA_Features_Train, MCA_Features_Test, MCA_Target_Train, MCA_Target_Test = train_test_split(MCA_Features, MCA_Target, test_size=0.2)

# Identify which features are categorical
categoricalFeatures = [" plan_player_favorite_server"]

# Identify which features are numerical (note we do not include the last seen time here)
numericalFeatures = [" mcmmo_power_level", " mcmmo_skill_ACROBATICS", " mcmmo_skill_ALCHEMY", " mcmmo_skill_ARCHERY", " mcmmo_skill_AXES", " mcmmo_skill_CROSSBOWS", " mcmmo_skill_EXCAVATION", " mcmmo_skill_FISHING", " mcmmo_skill_HERBALISM", " mcmmo_skill_MACES", " mcmmo_skill_MINING", " mcmmo_skill_REPAIR", " mcmmo_skill_SALVAGE", " mcmmo_skill_SMELTING", " mcmmo_skill_SWORDS", " mcmmo_skill_TAMING", " mcmmo_skill_TRIDENTS", " mcmmo_skill_UNARMED", " mcmmo_skill_WOODCUTTING", " lw_rev_total", " lw_rev_phase", " chemrank", " policerank", " donorrank", " goldrank", " current_month_votes", " plan_player_time_total_raw", " plan_player_time_month_raw", " plan_player_time_week_raw", " plan_player_time_day_raw", " plan_player_time_afk_raw", " plan_player_latest_session_length_raw", " plan_player_sessions_count", " leaderboard_position_chems_all", " leaderboard_position_chems_week", " leaderboard_position_police_all", " leaderboard_position_police_week", " balance", " plan_player_relativePlaytime_totalmonth", " plan_player_relativePlaytime_weekmonth", " plan_player_relativePlaytime_dayweek"]

# Create preprocessing transformers for encoding and scaling features
preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), categoricalFeatures),
        ("num", StandardScaler(), numericalFeatures)
    ]
)

# Define pipeline
MCA_Pipeline_LogReg = Pipeline([
    ("preprocessor", preprocessor),
    ("model", LogisticRegression(max_iter=1000))
])

# Fit pipeline
MCA_Pipeline_LogReg.fit(MCA_Features_Train, MCA_Target_Train)

# Predict
MCA_Target_Pred = MCA_Pipeline_LogReg.predict(MCA_Features_Test)

# Accuracy
print("Accuracy:", accuracy_score(MCA_Target_Pred, MCA_Target_Test))

# Confusion Matrix
print("Confusion Matrix:\n", confusion_matrix(MCA_Target_Test, MCA_Target_Pred))

# Precision, Recall, F1
print("Classification Report:\n", classification_report(MCA_Target_Test, MCA_Target_Pred))



Accuracy: 0.8802698145025295
Confusion Matrix:
 [[ 40  69]
 [  2 482]]
Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.37      0.53       109
           1       0.87      1.00      0.93       484

    accuracy                           0.88       593
   macro avg       0.91      0.68      0.73       593
weighted avg       0.89      0.88      0.86       593



In [None]:
'''
PIPELINE SAVING

This section saves the entire machine learning pipeline to a file for future use.
'''

# Save the entire pipeline
joblib.dump(MCA_Pipeline_LogReg, "../model-internals/MCA_Pipeline_LogReg.pkl")