In [37]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [38]:
df = pd.read_csv("/content/subscription_renewal_enhanced.csv.xls")

In [39]:
df.shape

(1000, 12)

In [40]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 12 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   user_id             1000 non-null   int64  
 1   usage_days          1000 non-null   int64  
 2   last_login          1000 non-null   int64  
 3   monthly_fee         1000 non-null   float64
 4   renewed             1000 non-null   int64  
 5   tenure_months       1000 non-null   int64  
 6   contract_type       1000 non-null   object 
 7   payment_method      1000 non-null   object 
 8   support_tickets     1000 non-null   int64  
 9   plan_type           1000 non-null   object 
 10  discount_applied    1000 non-null   int64  
 11  satisfaction_score  1000 non-null   float64
dtypes: float64(2), int64(7), object(3)
memory usage: 93.9+ KB


In [41]:
df.head()

Unnamed: 0,user_id,usage_days,last_login,monthly_fee,renewed,tenure_months,contract_type,payment_method,support_tickets,plan_type,discount_applied,satisfaction_score
0,1,6,0,9.99,0,29,Monthly,UPI,0,Basic,1,3.3
1,2,19,4,19.99,1,15,Quarterly,UPI,4,Premium,0,3.3
2,3,28,13,9.99,1,8,Quarterly,Net Banking,3,Basic,0,5.0
3,4,14,8,14.99,1,21,Monthly,Credit Card,1,Basic,0,3.2
4,5,10,13,14.99,0,19,Monthly,Debit Card,1,Basic,1,1.7


In [42]:
df.isnull().sum()

Unnamed: 0,0
user_id,0
usage_days,0
last_login,0
monthly_fee,0
renewed,0
tenure_months,0
contract_type,0
payment_method,0
support_tickets,0
plan_type,0


In [43]:
df['renewed'].value_counts()

Unnamed: 0_level_0,count
renewed,Unnamed: 1_level_1
1,598
0,402


In [44]:
df['renewed'].value_counts(normalize=True)*100

Unnamed: 0_level_0,proportion
renewed,Unnamed: 1_level_1
1,59.8
0,40.2


In [45]:
X = df.drop(['user_id', 'renewed'], axis=1)
y = df['renewed']

In [46]:
X = pd.get_dummies(X, drop_first=True)
X.head()

Unnamed: 0,usage_days,last_login,monthly_fee,tenure_months,support_tickets,discount_applied,satisfaction_score,contract_type_Monthly,contract_type_Quarterly,payment_method_Debit Card,payment_method_Net Banking,payment_method_UPI,plan_type_Premium,plan_type_Standard
0,6,0,9.99,29,0,1,3.3,True,False,False,False,True,False,False
1,19,4,19.99,15,4,0,3.3,False,True,False,False,True,True,False
2,28,13,9.99,8,3,0,5.0,False,True,False,True,False,False,False
3,14,8,14.99,21,1,0,3.2,True,False,False,False,False,False,False
4,10,13,14.99,19,1,1,1.7,True,False,True,False,False,False,False


In [47]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [48]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [49]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [50]:
from sklearn.metrics import accuracy_score, roc_auc_score

In [51]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression()
lr.fit(X_train_scaled, y_train)

y_pred_lr = lr.predict(X_test_scaled)

print("Logistic Regression ")
print("Accuracy:", accuracy_score(y_test, y_pred_lr))
print("ROC-AUC:", roc_auc_score(y_test, lr.predict_proba(X_test_scaled)[:,1]))

Logistic Regression 
Accuracy: 0.745
ROC-AUC: 0.7906133333333334


In [52]:
from sklearn.metrics import classification_report, confusion_matrix

print(confusion_matrix(y_test, y_pred_lr))
print(classification_report(y_test, y_pred_lr))

[[ 47  28]
 [ 23 102]]
              precision    recall  f1-score   support

           0       0.67      0.63      0.65        75
           1       0.78      0.82      0.80       125

    accuracy                           0.74       200
   macro avg       0.73      0.72      0.72       200
weighted avg       0.74      0.74      0.74       200



In [53]:
import pickle

In [54]:
pickle.dump(lr, open("model.pkl", "wb"))

In [55]:
feature_columns = X.columns
pickle.dump(feature_columns, open("columns.pkl", "wb"))