In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

#relevant ML libraries
from imblearn.over_sampling import ADASYN
from collections import Counter
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold,StratifiedKFold,RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
from sklearn.metrics import precision_score, recall_score, f1_score
import sklearn.metrics as metrics
from sklearn.neighbors import LocalOutlierFactor



#ML models
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier

from sklearn.ensemble import ExtraTreesClassifier
from sklearn.svm import SVC
from sklearn.linear_model import RidgeClassifier

warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('Telco-Customer-Churn.csv')

In [3]:
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [3]:
data = df.drop(['customerID','PhoneService','SeniorCitizen','StreamingMovies','StreamingTV'], axis =1)

In [4]:
data.head()

Unnamed: 0,gender,Partner,Dependents,tenure,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,Female,Yes,No,1,No phone service,DSL,No,Yes,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,Male,No,No,34,No,DSL,Yes,No,Yes,No,One year,No,Mailed check,56.95,1889.5,No
2,Male,No,No,2,No,DSL,Yes,Yes,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,Male,No,No,45,No phone service,DSL,Yes,No,Yes,Yes,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,Female,No,No,2,No,Fiber optic,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [5]:
data.isnull().sum()

gender              0
Partner             0
Dependents          0
tenure              0
MultipleLines       0
InternetService     0
OnlineSecurity      0
OnlineBackup        0
DeviceProtection    0
TechSupport         0
Contract            0
PaperlessBilling    0
PaymentMethod       0
MonthlyCharges      0
TotalCharges        0
Churn               0
dtype: int64

In [6]:
data['TotalCharges'] = pd.to_numeric(data['TotalCharges'], errors='coerce')

In [7]:
data.dtypes

gender               object
Partner              object
Dependents           object
tenure                int64
MultipleLines        object
InternetService      object
OnlineSecurity       object
OnlineBackup         object
DeviceProtection     object
TechSupport          object
Contract             object
PaperlessBilling     object
PaymentMethod        object
MonthlyCharges      float64
TotalCharges        float64
Churn                object
dtype: object

In [8]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [9]:
X = data.drop('Churn',axis = 1)
Y = data['Churn']

In [29]:
X.columns

Index(['gender', 'Partner', 'Dependents', 'tenure', 'MultipleLines',
       'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection',
       'TechSupport', 'Contract', 'PaperlessBilling', 'PaymentMethod',
       'MonthlyCharges', 'TotalCharges'],
      dtype='object')

In [28]:
X['PaymentMethod'].unique()

array(['Electronic check', 'Mailed check', 'Bank transfer (automatic)',
       'Credit card (automatic)'], dtype=object)

In [31]:
X_train, X_test, y_train, y_test  = train_test_split(X, Y, test_size=0.2, random_state=123)

In [32]:
from sklearn.impute import SimpleImputer
numeric_transformer = Pipeline(steps = [('imputer',SimpleImputer(strategy = 'mean')),('scaler',StandardScaler())])
categorical_transformer = Pipeline(steps = [('imputer',SimpleImputer(strategy = 'most_frequent')),('one_hot_encoder',OneHotEncoder())])

In [33]:
data_numeric =['tenure','MonthlyCharges','TotalCharges']
data_categorical =['gender','Partner','Dependents','MultipleLines','InternetService','OnlineSecurity','OnlineBackup','DeviceProtection','TechSupport','Contract','PaperlessBilling','PaymentMethod']
preprocessor =ColumnTransformer(transformers =[('numeric',numeric_transformer,data_numeric),('categoric',categorical_transformer,data_categorical)])

In [38]:
#the estimator
final_pipeline = Pipeline(steps =[('processor',preprocessor),('estimator',GradientBoostingClassifier())])

In [39]:
#create model
ml_model = final_pipeline.fit(X_train,y_train)

In [44]:
#evaluation
import pickle

In [45]:
pickle.dump(ml_model, open('ml_model.pkl', 'wb'))

In [None]:
#open a model saved in pickle file format
with open("filename.pkl", "rb") as f:
	clf  = pickle.load(f)

# Gradio app

In [None]:
# setup variables and constants
DIRPATH = os.path.dirname(os.path.realname(__file__))
ml_core_fp = os.path.join(DIRPATH,'assets','ml','ml_model.pkl')

In [None]:
#execution
ml_components_dict = load_ml_components(fp = ml_core_fp)
end2end_pipeline =ml_components_dict['pipeline']
print(f"\n['info']) ML components loaded:{list(ml_components_dict.keys())}")

In [None]:
#interface
demo =gr.interface(interface_function,['text'],'number', example =[])
if __name__=='main':
    demo.launch(debug = True)

In [32]:
import gradio as gr

ModuleNotFoundError: No module named 'gradio'

In [30]:
def make_prediction(gender, Partner, Dependents, tenure, MultipleLines,
       InternetService, OnlineSecurity, OnlineBackup, DeviceProtection,
       TechSupport, Contract, PaperlessBilling, PaymentMethod,
       MonthlyCharges, TotalCharges):
    with open("ml_model.pkl", "rb") as f:
        model = pickle.load(f)
        predt = model.predict([[gender, Partner, Dependents, tenure, MultipleLines,
       InternetService, OnlineSecurity, OnlineBackup, DeviceProtection,
       TechSupport, Contract, PaperlessBilling, PaymentMethod,
       MonthlyCharges, TotalCharges]]) 
    if pedt == 'Yes':
        return 'Customer Will Churn'
    return 'Customer Will Not Churn'

In [31]:
#create the input components for gradio
gender_input = gr.inputs.Dropdown(choices =['Female', 'Male']) 
Partner_input = gr.inputs.Dropdown(choices =['Yes', 'No']) 
Dependents_input = gr.inputs.Dropdown(choices =['Yes', 'No'])
tenure_input = gr.Number()
MultipleLines_input = gr.inputs.Dropdown(choices =['No phone service', 'No', 'Yes'])
InternetService_input = gr.inputs.Dropdown(choices =['DSL', 'Fiber optic', 'No']) 
OnlineSecurity_input = gr.inputs.Dropdown(choices =['No', 'Yes', 'No internet service']) 
OnlineBackup_input = gr.inputs.Dropdown(choices =['Yes', 'No', 'No internet service']) 
DeviceProtection_input = gr.inputs.Dropdown(choices =['No', 'Yes', 'No internet service'])
TechSupport_input = gr.inputs.Dropdown(choices =['No', 'Yes', 'No internet service'])
Contract_input = gr.inputs.Dropdown(choices =['Month-to-month', 'One year', 'Two year'])
PaperlessBilling_input = gr.inputs.Dropdown(choices =['Yes', 'No']) 
PaymentMethod_input = gr.inputs.Dropdown(choices =['Electronic check', 'Mailed check', 'Bank transfer (automatic)','Credit card (automatic)'])    
MonthlyCharges_input = gr.Number()
TotalCharges_input = gr.Number()

output = gr.Textbox()
      

NameError: name 'gr' is not defined

In [None]:
app = gr.Interface(fn =make_prediction, inputs =[gender_input,
                                                 Partner_inpu,
                                                 Dependents_input,
                                                 tenure_input,
                                                 MultipleLines_input,
                                                 InternetService_input,
                                                 OnlineSecurity_input,
                                                 OnlineBackup_input,
                                                 DeviceProtection_input,
                                                 TechSupport_input,
                                                 Contract_input,
                                                 PaperlessBilling_inpu,
                                                 PaymentMethod_input,
                                                 MonthlyCharges_input,
                                                 TotalCharges_input], outputs = output)

In [None]:
app.launch()

In [None]:
drop_down ={}
for var in categorical_var:
    drop_down_choices[var] = list(df[var].unique())