In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler,LabelEncoder
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,precision_score,classification_report

# Load the dataset
df=pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv',index_col=False)
df=df.drop(columns='customerID',axis=1)

In [2]:
label_encoder=LabelEncoder()

In [3]:
df['TotalCharges']=pd.to_numeric(df['TotalCharges'],errors='coerce')
df['MonthlyCharges']=pd.to_numeric(df['MonthlyCharges'],errors='coerce')
df['tenure']=pd.to_numeric(df['tenure'],errors='coerce')

In [4]:
labels=['gender','Partner','Dependents','PhoneService', 'MultipleLines', 'InternetService',
       'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport',
       'StreamingTV', 'StreamingMovies', 'PaperlessBilling',
       'PaymentMethod', 'MonthlyCharges','Churn','Contract']

for column in labels:
    df[column] = label_encoder.fit_transform(df[column])

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 20 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   gender            7043 non-null   int64  
 1   SeniorCitizen     7043 non-null   int64  
 2   Partner           7043 non-null   int64  
 3   Dependents        7043 non-null   int64  
 4   tenure            7043 non-null   int64  
 5   PhoneService      7043 non-null   int64  
 6   MultipleLines     7043 non-null   int64  
 7   InternetService   7043 non-null   int64  
 8   OnlineSecurity    7043 non-null   int64  
 9   OnlineBackup      7043 non-null   int64  
 10  DeviceProtection  7043 non-null   int64  
 11  TechSupport       7043 non-null   int64  
 12  StreamingTV       7043 non-null   int64  
 13  StreamingMovies   7043 non-null   int64  
 14  Contract          7043 non-null   int64  
 15  PaperlessBilling  7043 non-null   int64  
 16  PaymentMethod     7043 non-null   int64  


In [6]:
# Feature Scaling

scaler = StandardScaler()
numerical_cols = ['tenure', 'MonthlyCharges', 'TotalCharges']
df[numerical_cols] = scaler.fit_transform(df[numerical_cols])

In [7]:
df

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,0,0,1,0,-1.277445,0,1,0,0,2,0,0,0,0,0,1,2,-1.131766,-0.994194,0
1,1,0,0,0,0.066327,1,0,0,2,0,2,0,0,0,1,0,3,-0.387740,-0.173740,0
2,1,0,0,0,-1.236724,1,0,0,2,2,0,0,0,0,0,1,3,-0.517317,-0.959649,1
3,1,0,0,0,0.514251,0,1,0,2,0,2,2,0,0,1,0,0,-0.872611,-0.195248,0
4,0,0,0,0,-1.236724,1,0,1,0,0,0,0,0,0,0,1,2,0.095041,-0.940457,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,1,0,1,1,-0.340876,1,2,0,2,0,2,2,2,2,1,1,3,0.642612,-0.129180,0
7039,0,0,1,1,1.613701,1,2,1,0,2,2,0,2,2,1,1,1,1.372008,2.241056,0
7040,0,0,1,1,-0.870241,0,1,0,2,0,0,0,0,0,0,1,2,-1.142216,-0.854514,0
7041,1,1,1,0,-1.155283,1,2,1,0,0,0,0,0,0,0,1,3,0.232979,-0.872095,1


In [8]:
# Feature Interaction
df['Tenure_MonthlyCharges'] = df['tenure'] * df['MonthlyCharges']

In [9]:
# Convert 'TotalCharges' column to numeric
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')

# Feature Transformation (Example: Logarithmic transformation)
df['TotalCharges_Log'] = np.log1p(df['TotalCharges'])  # Apply log transformation using numpy's log1p function


In [10]:
# Binning (Example: Binning tenure into different groups)
bins = [0, 12, 24, 36, 48, 60, 72, float('inf')]
labels = ['0-12', '13-24', '25-36', '37-48', '49-60', '61-72', '72+']
df['tenure_bins'] = pd.cut(df['tenure'], bins=bins, labels=labels, right=False)

In [11]:
# Domain-specific Feature Engineering (Example: Average usage of services)
service_columns = ['OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies']
df['Average_Usage'] = df[service_columns].mean(axis=1)


In [12]:
df

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,...,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,Tenure_MonthlyCharges,TotalCharges_Log,tenure_bins,Average_Usage
0,0,0,1,0,-1.277445,0,1,0,0,2,...,0,1,2,-1.131766,-0.994194,0,1.445769,-5.148879,,0.333333
1,1,0,0,0,0.066327,1,0,0,2,0,...,1,0,3,-0.387740,-0.173740,0,-0.025718,-0.190846,0-12,0.666667
2,1,0,0,0,-1.236724,1,0,0,2,2,...,0,1,3,-0.517317,-0.959649,1,0.639779,-3.210142,,0.666667
3,1,0,0,0,0.514251,0,1,0,2,0,...,1,0,0,-0.872611,-0.195248,0,-0.448741,-0.217221,0-12,1.000000
4,0,0,0,0,-1.236724,1,0,1,0,0,...,0,1,2,0.095041,-0.940457,1,-0.117540,-2.821064,,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,1,0,1,1,-0.340876,1,2,0,2,0,...,1,1,3,0.642612,-0.129180,0,-0.219051,-0.138320,,1.666667
7039,0,0,1,1,1.613701,1,2,1,0,2,...,1,1,1,1.372008,2.241056,0,2.214012,1.175899,0-12,1.333333
7040,0,0,1,1,-0.870241,0,1,0,2,0,...,0,1,2,-1.142216,-0.854514,0,0.994003,-1.927676,,0.333333
7041,1,1,1,0,-1.155283,1,2,1,0,0,...,0,1,3,0.232979,-0.872095,1,-0.269157,-2.056471,,0.000000


In [13]:
df.columns

Index(['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
       'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity',
       'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV',
       'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod',
       'MonthlyCharges', 'TotalCharges', 'Churn', 'Tenure_MonthlyCharges',
       'TotalCharges_Log', 'tenure_bins', 'Average_Usage'],
      dtype='object')

In [14]:
df

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,...,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,Tenure_MonthlyCharges,TotalCharges_Log,tenure_bins,Average_Usage
0,0,0,1,0,-1.277445,0,1,0,0,2,...,0,1,2,-1.131766,-0.994194,0,1.445769,-5.148879,,0.333333
1,1,0,0,0,0.066327,1,0,0,2,0,...,1,0,3,-0.387740,-0.173740,0,-0.025718,-0.190846,0-12,0.666667
2,1,0,0,0,-1.236724,1,0,0,2,2,...,0,1,3,-0.517317,-0.959649,1,0.639779,-3.210142,,0.666667
3,1,0,0,0,0.514251,0,1,0,2,0,...,1,0,0,-0.872611,-0.195248,0,-0.448741,-0.217221,0-12,1.000000
4,0,0,0,0,-1.236724,1,0,1,0,0,...,0,1,2,0.095041,-0.940457,1,-0.117540,-2.821064,,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,1,0,1,1,-0.340876,1,2,0,2,0,...,1,1,3,0.642612,-0.129180,0,-0.219051,-0.138320,,1.666667
7039,0,0,1,1,1.613701,1,2,1,0,2,...,1,1,1,1.372008,2.241056,0,2.214012,1.175899,0-12,1.333333
7040,0,0,1,1,-0.870241,0,1,0,2,0,...,0,1,2,-1.142216,-0.854514,0,0.994003,-1.927676,,0.333333
7041,1,1,1,0,-1.155283,1,2,1,0,0,...,0,1,3,0.232979,-0.872095,1,-0.269157,-2.056471,,0.000000


In [15]:
labels=['gender','Partner','Dependents','PhoneService', 'MultipleLines', 'InternetService',
       'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport',
       'StreamingTV', 'StreamingMovies', 'PaperlessBilling',
       'PaymentMethod', 'MonthlyCharges','Churn','Contract','Average_Usage']

for column in labels:
    df[column] = label_encoder.fit_transform(df[column])

In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 24 columns):
 #   Column                 Non-Null Count  Dtype   
---  ------                 --------------  -----   
 0   gender                 7043 non-null   int64   
 1   SeniorCitizen          7043 non-null   int64   
 2   Partner                7043 non-null   int64   
 3   Dependents             7043 non-null   int64   
 4   tenure                 7043 non-null   float64 
 5   PhoneService           7043 non-null   int64   
 6   MultipleLines          7043 non-null   int64   
 7   InternetService        7043 non-null   int64   
 8   OnlineSecurity         7043 non-null   int64   
 9   OnlineBackup           7043 non-null   int64   
 10  DeviceProtection       7043 non-null   int64   
 11  TechSupport            7043 non-null   int64   
 12  StreamingTV            7043 non-null   int64   
 13  StreamingMovies        7043 non-null   int64   
 14  Contract               7043 non-null   i

In [17]:
print(f'check isf there is any  missing values in df\n{df.isna().sum()}')

check isf there is any  missing values in df
gender                      0
SeniorCitizen               0
Partner                     0
Dependents                  0
tenure                      0
PhoneService                0
MultipleLines               0
InternetService             0
OnlineSecurity              0
OnlineBackup                0
DeviceProtection            0
TechSupport                 0
StreamingTV                 0
StreamingMovies             0
Contract                    0
PaperlessBilling            0
PaymentMethod               0
MonthlyCharges              0
TotalCharges               11
Churn                       0
Tenure_MonthlyCharges       0
TotalCharges_Log           11
tenure_bins              3775
Average_Usage               0
dtype: int64


In [18]:
df['tenure_bins']

0        NaN
1       0-12
2        NaN
3       0-12
4        NaN
        ... 
7038     NaN
7039    0-12
7040     NaN
7041     NaN
7042    0-12
Name: tenure_bins, Length: 7043, dtype: category
Categories (7, object): ['0-12' < '13-24' < '25-36' < '37-48' < '49-60' < '61-72' < '72+']

In [19]:
# Fill missing values in 'TotalCharges' column with the mean value
df['TotalCharges'].fillna(df['TotalCharges'].mean(), inplace=True)

# Fill missing values in 'TotalCharges_Log' column with zero
df['TotalCharges_Log'].fillna(0, inplace=True)
most_frequent_category = df['tenure_bins'].mode().values[0]  # Get the most frequent category
df['tenure_bins'].fillna(most_frequent_category, inplace=True)  # Fill missing values with the most frequent category


In [20]:
df['tenure_bins'].info()

<class 'pandas.core.series.Series'>
RangeIndex: 7043 entries, 0 to 7042
Series name: tenure_bins
Non-Null Count  Dtype   
--------------  -----   
7043 non-null   category
dtypes: category(1)
memory usage: 7.4 KB


In [21]:
class_distribution=df['Churn'].value_counts()
print(class_distribution)
imbalance_ratio = max(class_distribution) / min(class_distribution)
print("Imbalance Ratio:", imbalance_ratio)

0    5174
1    1869
Name: Churn, dtype: int64
Imbalance Ratio: 2.7683253076511503


In [22]:
label_encoder = LabelEncoder()
df['tenure_bins_encoded'] = label_encoder.fit_transform(df['tenure_bins'])
df = df.drop('tenure_bins', axis=1)

In [23]:
X=df.drop('Churn',axis=1)
y=df['Churn']

smote=SMOTE(random_state=42)

X_resamples,y_resampled=smote.fit_resample(X,y)
print("the class distribution before resample")
print(y.value_counts())
print("The class distribution after resample")
print("\nClass Distribution After Resampling:")
print(pd.Series(y_resampled).value_counts())

the class distribution before resample
0    5174
1    1869
Name: Churn, dtype: int64
The class distribution after resample

Class Distribution After Resampling:
0    5174
1    5174
Name: Churn, dtype: int64


In [24]:
X_train,X_test,y_train,y_test=train_test_split(X_resamples,y_resampled)

In [27]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_train, y_train)
predicted_labels = rf_classifier.predict(X_test)
accuracy = accuracy_score(y_test, predicted_labels)
print("Accuracy:", accuracy)

Accuracy: 0.8353304986470815


In [26]:
from sklearn.ensemble import RandomForestClassifier
rf_classifers=RandomForestClassifier()

In [28]:
feature_importances = rf_classifier.feature_importances_

important_features = df.columns[:-1][feature_importances > 0.01]  # Select features with importance > 0.01


In [29]:
len(df.columns)

24

In [30]:
len(important_features)

21

In [31]:
# Select the final set of features for modeling
selected_features = ['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
       'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup',
       'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies',
       'Contract', 'PaperlessBilling', 'PaymentMethod', 'MonthlyCharges',
       'TotalCharges', 'Churn', 'Tenure_MonthlyCharges', 'TotalCharges_Log']

In [32]:
df_final = df[selected_features]

In [33]:
df_final

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,...,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,Tenure_MonthlyCharges,TotalCharges_Log
0,0,0,1,0,-1.277445,1,0,0,2,0,...,0,0,0,1,2,142,-0.994194,0,1.445769,-5.148879
1,1,0,0,0,0.066327,0,0,2,0,2,...,0,0,1,0,3,498,-0.173740,0,-0.025718,-0.190846
2,1,0,0,0,-1.236724,0,0,2,2,0,...,0,0,0,1,3,436,-0.959649,1,0.639779,-3.210142
3,1,0,0,0,0.514251,1,0,2,0,2,...,0,0,1,0,0,266,-0.195248,0,-0.448741,-0.217221
4,0,0,0,0,-1.236724,0,1,0,0,0,...,0,0,0,1,2,729,-0.940457,1,-0.117540,-2.821064
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,1,0,1,1,-0.340876,2,0,2,0,2,...,2,2,1,1,3,991,-0.129180,0,-0.219051,-0.138320
7039,0,0,1,1,1.613701,2,1,0,2,2,...,2,2,1,1,1,1340,2.241056,0,2.214012,1.175899
7040,0,0,1,1,-0.870241,1,0,2,0,0,...,0,0,0,1,2,137,-0.854514,0,0.994003,-1.927676
7041,1,1,1,0,-1.155283,2,1,0,0,0,...,0,0,0,1,3,795,-0.872095,1,-0.269157,-2.056471


In [None]:
# Day 2

In [43]:
import xgboost as xgb
from sklearn.metrics import accuracy_score

# Assuming you have your preprocessed features and labels as X_train, X_test, y_train, y_test

# Create the XGBoost Classifier instance
xgb_classifier = xgb.XGBClassifier()

# Train the model
xgb_classifier.fit(X_train, y_train)


In [44]:
# Make predictions on the test set
y_pred = xgb_classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)

# Print the accuracy score
print("Accuracy:", accuracy)

Accuracy: 0.843448009277155


In [49]:
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import StackingClassifier
import xgboost as xgb
import lightgbm as lgb
import catboost as cb

# Create a list of models
models = [
    RandomForestClassifier(),
    xgb.XGBClassifier(),
    lgb.LGBMClassifier(),
    cb.CatBoostClassifier(),
    SVC(),
    MLPClassifier(),
    # Add other models here
    AdaBoostClassifier(),
    StackingClassifier(estimators=[('rf', RandomForestClassifier()), ('xgb', xgb.XGBClassifier())]),
    LogisticRegression(),
    KNeighborsClassifier(),
    DecisionTreeClassifier(),
    GaussianNB(),
    GaussianProcessClassifier(),
    RidgeClassifier(),
    ExtraTreesClassifier(),
    GradientBoostingClassifier(),
    xgb.XGBClassifier(),
    lgb.LGBMClassifier(),
    cb.CatBoostClassifier()
]

# Iterate over the models
for model in models:
    # Train the model
    model.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = model.predict(X_test)

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)

    # Print the model's name and accuracy score
    print(type(model).__name__)
    print("Accuracy:", accuracy)
    print()


RandomForestClassifier
Accuracy: 0.8384228836490143

XGBClassifier
Accuracy: 0.843448009277155

LGBMClassifier
Accuracy: 0.843448009277155

Learning rate set to 0.024713
0:	learn: 0.6787889	total: 50.5ms	remaining: 50.4s
1:	learn: 0.6664013	total: 53.4ms	remaining: 26.6s
2:	learn: 0.6539556	total: 56.2ms	remaining: 18.7s
3:	learn: 0.6425252	total: 93.9ms	remaining: 23.4s
4:	learn: 0.6303662	total: 98.9ms	remaining: 19.7s
5:	learn: 0.6198391	total: 104ms	remaining: 17.2s
6:	learn: 0.6100482	total: 109ms	remaining: 15.5s
7:	learn: 0.6011226	total: 114ms	remaining: 14.2s
8:	learn: 0.5918307	total: 120ms	remaining: 13.2s
9:	learn: 0.5826086	total: 125ms	remaining: 12.4s
10:	learn: 0.5743112	total: 130ms	remaining: 11.7s
11:	learn: 0.5666740	total: 135ms	remaining: 11.1s
12:	learn: 0.5599496	total: 141ms	remaining: 10.7s
13:	learn: 0.5532397	total: 146ms	remaining: 10.3s
14:	learn: 0.5467619	total: 152ms	remaining: 10s
15:	learn: 0.5406413	total: 155ms	remaining: 9.54s
16:	learn: 0.5347962	

215:	learn: 0.3528510	total: 486ms	remaining: 1.76s
216:	learn: 0.3526219	total: 488ms	remaining: 1.76s
217:	learn: 0.3524095	total: 490ms	remaining: 1.76s
218:	learn: 0.3521634	total: 492ms	remaining: 1.75s
219:	learn: 0.3520113	total: 493ms	remaining: 1.75s
220:	learn: 0.3517968	total: 495ms	remaining: 1.74s
221:	learn: 0.3516519	total: 497ms	remaining: 1.74s
222:	learn: 0.3513759	total: 498ms	remaining: 1.74s
223:	learn: 0.3510369	total: 500ms	remaining: 1.73s
224:	learn: 0.3507427	total: 502ms	remaining: 1.73s
225:	learn: 0.3505181	total: 504ms	remaining: 1.72s
226:	learn: 0.3499439	total: 505ms	remaining: 1.72s
227:	learn: 0.3496942	total: 507ms	remaining: 1.72s
228:	learn: 0.3495577	total: 509ms	remaining: 1.71s
229:	learn: 0.3494002	total: 511ms	remaining: 1.71s
230:	learn: 0.3492574	total: 512ms	remaining: 1.7s
231:	learn: 0.3490553	total: 514ms	remaining: 1.7s
232:	learn: 0.3489385	total: 515ms	remaining: 1.7s
233:	learn: 0.3487440	total: 517ms	remaining: 1.69s
234:	learn: 0.3

450:	learn: 0.3130698	total: 876ms	remaining: 1.07s
451:	learn: 0.3127368	total: 878ms	remaining: 1.06s
452:	learn: 0.3125858	total: 879ms	remaining: 1.06s
453:	learn: 0.3124832	total: 881ms	remaining: 1.06s
454:	learn: 0.3123732	total: 883ms	remaining: 1.06s
455:	learn: 0.3122860	total: 884ms	remaining: 1.05s
456:	learn: 0.3121048	total: 886ms	remaining: 1.05s
457:	learn: 0.3119819	total: 888ms	remaining: 1.05s
458:	learn: 0.3118366	total: 890ms	remaining: 1.05s
459:	learn: 0.3116632	total: 892ms	remaining: 1.05s
460:	learn: 0.3115245	total: 893ms	remaining: 1.04s
461:	learn: 0.3114056	total: 895ms	remaining: 1.04s
462:	learn: 0.3112771	total: 896ms	remaining: 1.04s
463:	learn: 0.3111157	total: 898ms	remaining: 1.04s
464:	learn: 0.3109835	total: 900ms	remaining: 1.03s
465:	learn: 0.3108262	total: 901ms	remaining: 1.03s
466:	learn: 0.3107108	total: 903ms	remaining: 1.03s
467:	learn: 0.3106144	total: 904ms	remaining: 1.03s
468:	learn: 0.3104950	total: 906ms	remaining: 1.02s
469:	learn: 

691:	learn: 0.2816419	total: 1.26s	remaining: 563ms
692:	learn: 0.2815488	total: 1.27s	remaining: 561ms
693:	learn: 0.2814801	total: 1.27s	remaining: 560ms
694:	learn: 0.2813751	total: 1.27s	remaining: 558ms
695:	learn: 0.2812941	total: 1.27s	remaining: 556ms
696:	learn: 0.2811917	total: 1.27s	remaining: 554ms
697:	learn: 0.2810888	total: 1.27s	remaining: 552ms
698:	learn: 0.2809637	total: 1.28s	remaining: 550ms
699:	learn: 0.2808304	total: 1.28s	remaining: 548ms
700:	learn: 0.2807166	total: 1.28s	remaining: 546ms
701:	learn: 0.2806476	total: 1.28s	remaining: 544ms
702:	learn: 0.2805675	total: 1.28s	remaining: 543ms
703:	learn: 0.2804365	total: 1.28s	remaining: 541ms
704:	learn: 0.2803486	total: 1.29s	remaining: 539ms
705:	learn: 0.2802238	total: 1.29s	remaining: 537ms
706:	learn: 0.2801601	total: 1.29s	remaining: 535ms
707:	learn: 0.2800778	total: 1.29s	remaining: 533ms
708:	learn: 0.2799876	total: 1.29s	remaining: 531ms
709:	learn: 0.2798064	total: 1.29s	remaining: 529ms
710:	learn: 

925:	learn: 0.2576522	total: 1.65s	remaining: 132ms
926:	learn: 0.2575902	total: 1.66s	remaining: 130ms
927:	learn: 0.2574829	total: 1.66s	remaining: 129ms
928:	learn: 0.2574185	total: 1.66s	remaining: 127ms
929:	learn: 0.2573422	total: 1.66s	remaining: 125ms
930:	learn: 0.2572950	total: 1.66s	remaining: 123ms
931:	learn: 0.2571313	total: 1.67s	remaining: 122ms
932:	learn: 0.2570199	total: 1.67s	remaining: 120ms
933:	learn: 0.2569500	total: 1.67s	remaining: 118ms
934:	learn: 0.2568359	total: 1.67s	remaining: 116ms
935:	learn: 0.2567454	total: 1.67s	remaining: 114ms
936:	learn: 0.2566867	total: 1.68s	remaining: 113ms
937:	learn: 0.2566014	total: 1.68s	remaining: 111ms
938:	learn: 0.2565163	total: 1.68s	remaining: 109ms
939:	learn: 0.2564809	total: 1.68s	remaining: 107ms
940:	learn: 0.2563226	total: 1.68s	remaining: 105ms
941:	learn: 0.2562414	total: 1.68s	remaining: 104ms
942:	learn: 0.2561833	total: 1.69s	remaining: 102ms
943:	learn: 0.2561145	total: 1.69s	remaining: 100ms
944:	learn: 

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


GaussianProcessClassifier
Accuracy: 0.8341708542713567

RidgeClassifier
Accuracy: 0.8117510630073445

ExtraTreesClassifier
Accuracy: 0.8376497873985311

GradientBoostingClassifier
Accuracy: 0.8322381136451488

XGBClassifier
Accuracy: 0.843448009277155

LGBMClassifier
Accuracy: 0.843448009277155

Learning rate set to 0.024713
0:	learn: 0.6787889	total: 2.48ms	remaining: 2.48s
1:	learn: 0.6664013	total: 4.25ms	remaining: 2.12s
2:	learn: 0.6539556	total: 6.32ms	remaining: 2.1s
3:	learn: 0.6425252	total: 8.03ms	remaining: 2s
4:	learn: 0.6303662	total: 9.7ms	remaining: 1.93s
5:	learn: 0.6198391	total: 11.8ms	remaining: 1.96s
6:	learn: 0.6100482	total: 13.5ms	remaining: 1.91s
7:	learn: 0.6011226	total: 15.1ms	remaining: 1.87s
8:	learn: 0.5918307	total: 16.9ms	remaining: 1.86s
9:	learn: 0.5826086	total: 18.9ms	remaining: 1.87s
10:	learn: 0.5743112	total: 20.6ms	remaining: 1.85s
11:	learn: 0.5666740	total: 22.3ms	remaining: 1.83s
12:	learn: 0.5599496	total: 24.2ms	remaining: 1.84s
13:	learn: 0

226:	learn: 0.3499439	total: 439ms	remaining: 1.49s
227:	learn: 0.3496942	total: 441ms	remaining: 1.49s
228:	learn: 0.3495577	total: 442ms	remaining: 1.49s
229:	learn: 0.3494002	total: 444ms	remaining: 1.49s
230:	learn: 0.3492574	total: 446ms	remaining: 1.48s
231:	learn: 0.3490553	total: 448ms	remaining: 1.48s
232:	learn: 0.3489385	total: 449ms	remaining: 1.48s
233:	learn: 0.3487440	total: 451ms	remaining: 1.48s
234:	learn: 0.3485929	total: 453ms	remaining: 1.47s
235:	learn: 0.3484102	total: 455ms	remaining: 1.47s
236:	learn: 0.3481405	total: 456ms	remaining: 1.47s
237:	learn: 0.3480205	total: 458ms	remaining: 1.47s
238:	learn: 0.3478201	total: 460ms	remaining: 1.46s
239:	learn: 0.3476272	total: 462ms	remaining: 1.46s
240:	learn: 0.3474342	total: 463ms	remaining: 1.46s
241:	learn: 0.3471249	total: 465ms	remaining: 1.46s
242:	learn: 0.3468140	total: 467ms	remaining: 1.45s
243:	learn: 0.3466474	total: 469ms	remaining: 1.45s
244:	learn: 0.3464697	total: 470ms	remaining: 1.45s
245:	learn: 

461:	learn: 0.3114056	total: 826ms	remaining: 962ms
462:	learn: 0.3112771	total: 828ms	remaining: 960ms
463:	learn: 0.3111157	total: 830ms	remaining: 958ms
464:	learn: 0.3109835	total: 831ms	remaining: 956ms
465:	learn: 0.3108262	total: 833ms	remaining: 954ms
466:	learn: 0.3107108	total: 835ms	remaining: 953ms
467:	learn: 0.3106144	total: 837ms	remaining: 951ms
468:	learn: 0.3104950	total: 838ms	remaining: 949ms
469:	learn: 0.3103578	total: 840ms	remaining: 947ms
470:	learn: 0.3102483	total: 842ms	remaining: 945ms
471:	learn: 0.3100995	total: 843ms	remaining: 943ms
472:	learn: 0.3099820	total: 845ms	remaining: 941ms
473:	learn: 0.3098796	total: 846ms	remaining: 939ms
474:	learn: 0.3097749	total: 848ms	remaining: 937ms
475:	learn: 0.3096215	total: 850ms	remaining: 935ms
476:	learn: 0.3095334	total: 851ms	remaining: 933ms
477:	learn: 0.3094098	total: 853ms	remaining: 931ms
478:	learn: 0.3092401	total: 854ms	remaining: 929ms
479:	learn: 0.3090647	total: 856ms	remaining: 927ms
480:	learn: 

697:	learn: 0.2810888	total: 1.21s	remaining: 525ms
698:	learn: 0.2809637	total: 1.22s	remaining: 524ms
699:	learn: 0.2808304	total: 1.22s	remaining: 522ms
700:	learn: 0.2807166	total: 1.22s	remaining: 520ms
701:	learn: 0.2806476	total: 1.22s	remaining: 518ms
702:	learn: 0.2805675	total: 1.22s	remaining: 517ms
703:	learn: 0.2804365	total: 1.22s	remaining: 515ms
704:	learn: 0.2803486	total: 1.23s	remaining: 513ms
705:	learn: 0.2802238	total: 1.23s	remaining: 511ms
706:	learn: 0.2801601	total: 1.23s	remaining: 510ms
707:	learn: 0.2800778	total: 1.23s	remaining: 508ms
708:	learn: 0.2799876	total: 1.23s	remaining: 506ms
709:	learn: 0.2798064	total: 1.23s	remaining: 504ms
710:	learn: 0.2796648	total: 1.24s	remaining: 503ms
711:	learn: 0.2795312	total: 1.24s	remaining: 501ms
712:	learn: 0.2793876	total: 1.24s	remaining: 499ms
713:	learn: 0.2792833	total: 1.24s	remaining: 497ms
714:	learn: 0.2791980	total: 1.24s	remaining: 496ms
715:	learn: 0.2790591	total: 1.24s	remaining: 494ms
716:	learn: 

928:	learn: 0.2574185	total: 1.6s	remaining: 122ms
929:	learn: 0.2573422	total: 1.6s	remaining: 121ms
930:	learn: 0.2572950	total: 1.6s	remaining: 119ms
931:	learn: 0.2571313	total: 1.6s	remaining: 117ms
932:	learn: 0.2570199	total: 1.61s	remaining: 115ms
933:	learn: 0.2569500	total: 1.61s	remaining: 114ms
934:	learn: 0.2568359	total: 1.61s	remaining: 112ms
935:	learn: 0.2567454	total: 1.61s	remaining: 110ms
936:	learn: 0.2566867	total: 1.61s	remaining: 109ms
937:	learn: 0.2566014	total: 1.61s	remaining: 107ms
938:	learn: 0.2565163	total: 1.62s	remaining: 105ms
939:	learn: 0.2564809	total: 1.62s	remaining: 103ms
940:	learn: 0.2563226	total: 1.62s	remaining: 102ms
941:	learn: 0.2562414	total: 1.62s	remaining: 99.9ms
942:	learn: 0.2561833	total: 1.62s	remaining: 98.2ms
943:	learn: 0.2561145	total: 1.63s	remaining: 96.4ms
944:	learn: 0.2560057	total: 1.63s	remaining: 94.7ms
945:	learn: 0.2559184	total: 1.63s	remaining: 93ms
946:	learn: 0.2557628	total: 1.63s	remaining: 91.2ms
947:	learn: 

In [62]:
X_train

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,...,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Tenure_MonthlyCharges,TotalCharges_Log,Average_Usage,tenure_bins_encoded
4959,0,0,1,1,-1.277445,1,0,2,1,1,...,1,0,1,1,32,-0.998474,1.739448,-6.484847,3,0
1748,0,0,1,0,0.880735,1,0,2,1,1,...,1,2,0,1,30,-0.520998,-1.202945,-0.736051,3,0
7005,1,0,1,0,-0.381597,1,0,2,1,1,...,1,1,0,1,15,-0.792858,0.533164,-1.574352,3,0
1577,0,0,1,1,-0.625919,1,0,1,0,0,...,2,1,0,0,1254,-0.255161,-0.746265,-0.294587,4,0
6031,0,1,1,1,-0.829521,0,1,0,0,0,...,0,0,1,3,132,-0.850345,0.956160,-1.899422,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8260,0,0,0,0,0.173010,1,2,1,0,1,...,0,0,1,0,961,0.392979,0.101235,0.132488,1,0
4089,0,0,0,0,-0.951682,1,2,1,0,0,...,0,0,1,2,802,-0.695620,-0.235645,-1.189480,0,0
78,1,1,0,0,-0.096554,1,0,0,2,2,...,2,0,1,2,802,-0.075884,-0.023908,-0.078918,4,0
9000,1,0,0,0,0.501347,1,1,1,1,1,...,0,0,1,1,1222,0.874428,0.563918,0.545461,3,0


In [50]:
# cant get the accuracy above 85% so im trying deep learning approches

In [None]:
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
input_shape = (23,)
model = keras.Sequential([
    layers.Dense(128, activation='relu', input_shape=input_shape),
    layers.BatchNormalization(),
    layers.Dense(256, activation='relu'),
    layers.BatchNormalization(),
    layers.Dense(256, activation='relu'),
    layers.BatchNormalization(),
    layers.Dense(128, activation='relu'),
    layers.BatchNormalization(),
    layers.Dense(64, activation='relu'),
    layers.BatchNormalization(),
    layers.Dense(2, activation='softmax')
])

In [66]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=32, epochs=30, validation_data=(X_test, y_test))
_, accuracy = model.evaluate(X_test, y_test)
print("Accuracy:", accuracy)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Accuracy: 0.5071511268615723
