<h4><b>Importing necessary libraries</b></h4>

In [11]:
from sklearn.linear_model import LogisticRegression
import pandas as pd
import numpy as np

<h4><b>Importing behavioural data for trial users</b></h4>

In [12]:
behavioral_features = pd.read_csv("../data/processed/trial_behavioral_features.csv")
behavioral_features.head()

Unnamed: 0,account_id,signup_date,usage_events_30d,active_days_30d,features_used_30d,total_errors_30d,total_usage_time_30d,churn_date,early_churn,activation_level,high_usage,errors_per_active_day
0,A-00bed1,2023-11-14,12.0,1,1,0.0,444.0,2024-01-03,True,low,False,0.0
1,A-00cac8,2023-09-15,25.0,2,2,1.0,6365.0,,False,medium,True,0.5
2,A-016043,2024-07-31,10.0,1,1,0.0,3070.0,2024-08-11,True,low,False,0.0
3,A-0f6450,2024-12-27,120.0,6,11,6.0,41729.0,2024-12-29,True,high,True,1.0
4,A-10b8f2,2023-01-11,8.0,1,1,1.0,3088.0,2024-06-04,False,low,False,1.0


<h4><b>Performing Logistic Regression</b></h4>

In [13]:
X = behavioral_features[
    ["active_days_30d","errors_per_active_day","total_usage_time_30d"]
]

y = behavioral_features["early_churn"]

model = LogisticRegression(solver="liblinear")
model.fit(X, y)

In [14]:
coef_df = pd.DataFrame({
    "feature": X.columns,
    "coefficient": model.coef_[0],
    "odds_ratio": np.exp(model.coef_[0])
}).sort_values("odds_ratio")
coef_df

Unnamed: 0,feature,coefficient,odds_ratio
1,errors_per_active_day,-0.404513,0.667302
0,active_days_30d,-0.398689,0.6712
2,total_usage_time_30d,0.000184,1.000184


In [15]:
behavioral_features["churn_risk"] = model.predict_proba(X)[:, 1]
behavioral_features.sort_values(by="churn_risk", ascending=False)

Unnamed: 0,account_id,signup_date,usage_events_30d,active_days_30d,features_used_30d,total_errors_30d,total_usage_time_30d,churn_date,early_churn,activation_level,high_usage,errors_per_active_day,churn_risk
6,A-18793f,2024-12-18,171.0,7,11,4.0,54617.0,2024-12-30,True,high,True,0.571429,0.997931
3,A-0f6450,2024-12-27,120.0,6,11,6.0,41729.0,2024-12-29,True,high,True,1.000000,0.982509
48,A-c16cf7,2024-10-16,155.0,8,12,2.0,37862.0,2024-11-08,True,high,True,0.250000,0.943837
22,A-5790f4,2024-12-22,122.0,7,9,8.0,36704.0,2024-12-25,True,high,True,1.142857,0.933751
17,A-4bfa33,2024-12-07,129.0,8,12,10.0,35402.0,2024-12-08,True,high,True,1.250000,0.876943
...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,A-fa2041,2024-05-29,16.0,2,2,1.0,1288.0,2024-07-13,True,medium,False,0.500000,0.164094
52,A-cb6cc6,2023-04-03,6.0,1,1,1.0,114.0,,False,low,False,1.000000,0.161377
16,A-462d45,2024-01-15,30.0,3,3,2.0,3692.0,2024-05-17,False,medium,False,0.666667,0.160958
61,A-f17767,2023-01-30,8.0,1,1,2.0,56.0,2023-05-03,False,low,False,2.000000,0.112723
