In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import balanced_accuracy_score, roc_auc_score, make_scorer
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_confusion_matrix

In [2]:
df = pd.read_csv('../data/raw/churn.csv')

In [3]:
df.drop(['Churn Label','City','Churn Score', 'CLTV', 'Churn Reason',
        'Count', 'Country', 'State', 'CustomerID', 'Lat Long'], axis = 1, inplace = True)

In [4]:
#replace whitespaces in City values with _
#df['City'].replace(' ', '_', regex=True, inplace=True)

In [5]:
#remove whitespaces from columns
df.columns = df.columns.str.replace(' ', '_')

In [6]:
#setting observed empty space to zero
df.loc[(df['Total_Charges']==" "),'Total_Charges']= 0

In [7]:
#total charges feature converted from object to float
df['Total_Charges'] = pd.to_numeric(df['Total_Charges'])

In [8]:
#replace all empty spaces with _
df.replace(' ', '_', regex=True, inplace=True)

In [9]:
X = df.drop('Churn_Value',axis=1)
y = df['Churn_Value']

In [10]:
X = pd.get_dummies(X, columns = ['Gender',
                                'Senior_Citizen',
                                'Partner',
                                'Dependents',
                                'Phone_Service',
                                'Multiple_Lines',
                                'Internet_Service',
                                'Online_Security',
                                'Online_Backup',
                                'Device_Protection',
                                'Tech_Support',
                                'Streaming_TV',
                                'Streaming_Movies',
                                'Contract',
                                'Paperless_Billing',
                                'Payment_Method'
                                         
    
],drop_first=True)
X.head()

Unnamed: 0,Zip_Code,Latitude,Longitude,Tenure_Months,Monthly_Charges,Total_Charges,Gender_Male,Senior_Citizen_Yes,Partner_Yes,Dependents_Yes,...,Streaming_TV_No_internet_service,Streaming_TV_Yes,Streaming_Movies_No_internet_service,Streaming_Movies_Yes,Contract_One_year,Contract_Two_year,Paperless_Billing_Yes,Payment_Method_Credit_card_(automatic),Payment_Method_Electronic_check,Payment_Method_Mailed_check
0,90003,33.964131,-118.272783,2,53.85,108.15,1,0,0,0,...,0,0,0,0,0,0,1,0,0,1
1,90005,34.059281,-118.30742,2,70.7,151.65,0,0,0,1,...,0,0,0,0,0,0,1,0,1,0
2,90006,34.048013,-118.293953,8,99.65,820.5,0,0,0,1,...,0,1,0,1,0,0,1,0,1,0
3,90010,34.062125,-118.315709,28,104.8,3046.05,0,0,1,1,...,0,1,0,1,0,0,1,0,1,0
4,90015,34.039224,-118.266293,49,103.7,5036.3,1,0,0,1,...,0,1,0,1,0,0,1,0,0,0


In [11]:
X.shape

(7043, 33)

In [12]:
list(X.columns)

['Zip_Code',
 'Latitude',
 'Longitude',
 'Tenure_Months',
 'Monthly_Charges',
 'Total_Charges',
 'Gender_Male',
 'Senior_Citizen_Yes',
 'Partner_Yes',
 'Dependents_Yes',
 'Phone_Service_Yes',
 'Multiple_Lines_No_phone_service',
 'Multiple_Lines_Yes',
 'Internet_Service_Fiber_optic',
 'Internet_Service_No',
 'Online_Security_No_internet_service',
 'Online_Security_Yes',
 'Online_Backup_No_internet_service',
 'Online_Backup_Yes',
 'Device_Protection_No_internet_service',
 'Device_Protection_Yes',
 'Tech_Support_No_internet_service',
 'Tech_Support_Yes',
 'Streaming_TV_No_internet_service',
 'Streaming_TV_Yes',
 'Streaming_Movies_No_internet_service',
 'Streaming_Movies_Yes',
 'Contract_One_year',
 'Contract_Two_year',
 'Paperless_Billing_Yes',
 'Payment_Method_Credit_card_(automatic)',
 'Payment_Method_Electronic_check',
 'Payment_Method_Mailed_check']

In [13]:
import mlflow
print(mlflow.__version__)

2.0.1


In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [15]:
features = list(X_train.columns)

In [16]:
features

['Zip_Code',
 'Latitude',
 'Longitude',
 'Tenure_Months',
 'Monthly_Charges',
 'Total_Charges',
 'Gender_Male',
 'Senior_Citizen_Yes',
 'Partner_Yes',
 'Dependents_Yes',
 'Phone_Service_Yes',
 'Multiple_Lines_No_phone_service',
 'Multiple_Lines_Yes',
 'Internet_Service_Fiber_optic',
 'Internet_Service_No',
 'Online_Security_No_internet_service',
 'Online_Security_Yes',
 'Online_Backup_No_internet_service',
 'Online_Backup_Yes',
 'Device_Protection_No_internet_service',
 'Device_Protection_Yes',
 'Tech_Support_No_internet_service',
 'Tech_Support_Yes',
 'Streaming_TV_No_internet_service',
 'Streaming_TV_Yes',
 'Streaming_Movies_No_internet_service',
 'Streaming_Movies_Yes',
 'Contract_One_year',
 'Contract_Two_year',
 'Paperless_Billing_Yes',
 'Payment_Method_Credit_card_(automatic)',
 'Payment_Method_Electronic_check',
 'Payment_Method_Mailed_check']

In [22]:
X_train.columns = X_train.columns.str.replace('(', '',regex=True).str.replace(')','',regex=True)

In [23]:
list(X_train.columns)

['Zip_Code',
 'Latitude',
 'Longitude',
 'Tenure_Months',
 'Monthly_Charges',
 'Total_Charges',
 'Gender_Male',
 'Senior_Citizen_Yes',
 'Partner_Yes',
 'Dependents_Yes',
 'Phone_Service_Yes',
 'Multiple_Lines_No_phone_service',
 'Multiple_Lines_Yes',
 'Internet_Service_Fiber_optic',
 'Internet_Service_No',
 'Online_Security_No_internet_service',
 'Online_Security_Yes',
 'Online_Backup_No_internet_service',
 'Online_Backup_Yes',
 'Device_Protection_No_internet_service',
 'Device_Protection_Yes',
 'Tech_Support_No_internet_service',
 'Tech_Support_Yes',
 'Streaming_TV_No_internet_service',
 'Streaming_TV_Yes',
 'Streaming_Movies_No_internet_service',
 'Streaming_Movies_Yes',
 'Contract_One_year',
 'Contract_Two_year',
 'Paperless_Billing_Yes',
 'Payment_Method_Credit_card_automatic',
 'Payment_Method_Electronic_check',
 'Payment_Method_Mailed_check']

In [25]:
X_train.head(2)

Unnamed: 0,Zip_Code,Latitude,Longitude,Tenure_Months,Monthly_Charges,Total_Charges,Gender_Male,Senior_Citizen_Yes,Partner_Yes,Dependents_Yes,...,Streaming_TV_No_internet_service,Streaming_TV_Yes,Streaming_Movies_No_internet_service,Streaming_Movies_Yes,Contract_One_year,Contract_Two_year,Paperless_Billing_Yes,Payment_Method_Credit_card_automatic,Payment_Method_Electronic_check,Payment_Method_Mailed_check
6764,90063,34.044271,-118.185237,72,53.65,3784.0,0,0,1,1,...,0,1,0,1,0,1,0,1,0,0
1257,95951,39.732767,-122.042298,4,46.0,193.6,0,0,0,0,...,0,0,0,0,0,0,1,0,0,1


In [61]:
data_in = {'a': [1], 'b': [2], 'c': [3], 'd':[4], 'c':[5], 'd':[6], 'e':[7],
                    'f':[8], 'g':[8], 'h':[9], 'i':[10], 'j':[11],
                    'k':[12], 'l':[13], 'm':[14], 'n':[15],
                    'o':[16], 'p':[17], 'q':[18], 'r':[19],
                    's':[20], 't':[21], 'u':[22], 'v':[23],
                    'w':[24], 'x':[25], 'y':[26], 'z':[27], 'aa':[28],
                    'bb':[29], 'cc':[30], 'dd':[31], 'ee':[32],
                   }
data_in = pd.DataFrame(data_in)


In [None]:

        data_in = {'a': [Zip_Code], 'b': [Latitude], 'c': [Longitude], 'd':[Tenure_Months], 'e':[Monthly_Charges], 'f':[Total_Charges], 'g':[Gender_Male],
                    'h':[Senior_Citizen_Yes], 'i':[Partner_Yes], 'j':[Dependents_Yes], 'k':[Phone_Service_Yes], 'l':[Multiple_Lines_No_phone_service],
                    'm':[Multiple_Lines_Yes], 'n':[Internet_Service_Fiber_optic], 'o':[Internet_Service_No], 'p':[Online_Security_No_internet_service],
                    'q':[Online_Security_Yes], 'r':[Online_Backup_No_internet_service], 's':[Online_Backup_Yes], 't':[Device_Protection_No_internet_service],
                    'u':[Device_Protection_Yes], 'v':[Tech_Support_No_internet_service], 'w':[Tech_Support_Yes], 'x':[Streaming_TV_No_internet_service],
                    'y':[Streaming_TV_Yes], 'z':[Streaming_Movies_No_internet_service], 'aa':[Streaming_Movies_Yes], 'bb':[Contract_One_year], 'cc':[Contract_Two_year],
                    'dd':[Paperless_Billing_Yes], 'ee':[Payment_Method_Credit_card_automatic], 'ff':[Payment_Method_Electronic_check], 'gg':[Payment_Method_Mailed_check],
                   }

In [None]:
 Zip_Code, Latitude, Longitude, Tenure_Months, Monthly_Charges, Total_Charges, Gender_Male,
                    Senior_Citizen_Yes, Partner_Yes, Dependents_Yes, Phone_Service_Yes, Multiple_Lines_No_phone_service,
                    Multiple_Lines_Yes, Internet_Service_Fiber_optic, Internet_Service_No, Online_Security_No_internet_service,
                    Online_Security_Yes, Online_Backup_No_internet_service, Online_Backup_Yes, Device_Protection_No_internet_service,
                    Device_Protection_Yes, Tech_Support_No_internet_service, Tech_Support_Yes, Streaming_TV_No_internet_service,
                    Streaming_TV_Yes, Streaming_Movies_No_internet_service, Streaming_Movies_Yes, Contract_One_year, Contract_Two_year,
                    Paperless_Billing_Yes, Payment_Method_Credit_card_automatic, Payment_Method_Electronic_check, Payment_Method_Mailed_check)

In [62]:
data_in

Unnamed: 0,a,b,c,d,e,f,g,h,i,j,...,v,w,x,y,z,aa,bb,cc,dd,ee
0,1,2,5,6,7,8,8,9,10,11,...,23,24,25,26,27,28,29,30,31,32


In [50]:
df

Unnamed: 0,Name,Age
0,Tom,20
1,Joseph,21
2,Krish,19
3,John,18


In [67]:
final_df = pd.read_csv('HP 1O3O G2/telecom_customer_churn_prediction/data/final')

FileNotFoundError: [Errno 2] No such file or directory: 'HP 1O3O G2/telecom_customer_churn_prediction/data/final'

In [75]:
import numpy
data_in = numpy.array([1,2,3])

In [89]:
type(data_in)

numpy.ndarray

In [94]:
a = [[2,3,4]]

In [103]:
np.array(a)[0][0]

2

In [120]:
a = {'a':3, 'b':5}

In [121]:
type(a)

dict

In [135]:
df = pd.DataFrame({'y':[a['a']],'z':[a['b']]})

In [136]:
df

Unnamed: 0,y,z
0,3,5


In [119]:
pd.DataFrame(r)

Unnamed: 0,a,b
0,c,d


In [106]:
pd.DataFrame(a)

Unnamed: 0,a,b
0,3,4


In [139]:
#!pip install pydiantic
from pydantic import BaseModel

In [148]:
class ChurnFeature(BaseModel):
    Zip_Code: int
    Latitude: float
    Longitude: float
    Tenure_Months: int
    Monthly_Charges: float
    Total_Charges: float
    Gender_Male: int
    Senior_Citizen_Yes: int
    Partner_Yes: int
    Dependents_Yes: int
    Phone_Service_Yes: int
    Multiple_Lines_No_phone_service: int
    Multiple_Lines_Yes: int
    Internet_Service_Fiber_optic: int
    Internet_Service_No: int
    Online_Security_No_internet_service:int
    Online_Security_Yes: int
    Online_Backup_No_internet_service: int
    Online_Backup_Yes: int
    Device_Protection_No_internet_service: int
    Device_Protection_Yes: int
    Tech_Support_No_internet_service: int
    Tech_Support_Yes: int
    Streaming_TV_No_internet_service: int
    Streaming_TV_Yes: int
    Streaming_Movies_No_internet_service: int
    Streaming_Movies_Yes: int
    Contract_One_year: int
    Contract_Two_year: int
    Paperless_Billing_Yes: int
    Payment_Method_Credit_card_automatic: int
    Payment_Method_Electronic_check: int
    Payment_Method_Mailed_check: int

In [149]:
from fastapi import FastAPI
app = FastAPI()
@app.post('/predict')
def predict_churn_api(feature: ChurnFeature):
    data = feature.dict()
    dic = {'Zip_Code': [data['Zip_Code']], 'Latitude': [data['Latitude']], 'Longitude': [data['Longitude']],
           'Tenure_Months': [data['Tenure_Months']],
           'Monthly_Charges': [data['Monthly_Charges']], 'Total_Charges': [data['Total_Charges']],
           'Gender_Male': [data['Gender_Male']], 'Senior_Citizen_Yes': [data['Senior_Citizen_Yes']],
           'Partner_Yes': [data['Partner_Yes']], 'Dependents_Yes': [data['Dependents_Yes']],
           'Phone_Service_Yes': [data['Phone_Service_Yes']],
           'Multiple_Lines_No_phone_service': [data['Multiple_Lines_No_phone_service']],
           'Multiple_Lines_Yes': [data['Multiple_Lines_Yes']],
           'Internet_Service_Fiber_optic': [data['Internet_Service_Fiber_optic']],
           'Internet_Service_No': [data['Internet_Service_No']],
           'Online_Security_No_internet_service': [data['Online_Security_No_internet_service']],
           'Online_Security_Yes': [data['Online_Security_Yes']],
           'Online_Backup_No_internet_service': [data['Online_Backup_No_internet_service']],
           'Online_Backup_Yes': [data['Online_Backup_Yes']],
           'Device_Protection_No_internet_service': [data['Device_Protection_No_internet_service']],
           'Device_Protection_Yes': [data['Device_Protection_Yes']],
           'Tech_Support_No_internet_service': [data['Tech_Support_No_internet_service']],
           'Tech_Support_Yes': [data['Tech_Support_Yes']],
           'Streaming_TV_No_internet_service': data['Streaming_TV_No_internet_service'],
           'Streaming_TV_Yes': [data['Streaming_TV_Yes']],
           'Streaming_Movies_No_internet_service': [data['Streaming_Movies_No_internet_service']],
           'Streaming_Movies_Yes': [data['Streaming_Movies_Yes']], 'Contract_One_year': [data['Contract_One_year']],
           'Contract_Two_year': [data['Contract_Two_year']], 'Paperless_Billing_Yes': [data['Paperless_Billing_Yes']],
           'Payment_Method_Credit_card_automatic': [data['Payment_Method_Credit_card_automatic']],
           'Payment_Method_Electronic_check': [data['Payment_Method_Electronic_check']],
           'Payment_Method_Mailed_check': [data['Payment_Method_Mailed_check']]}

In [151]:
dic

NameError: name 'dic' is not defined

In [161]:
pd.DataFrame([[a],[b]])

Unnamed: 0,0
0,"{'a': 3, 'b': 5}"
1,"[[1], [2], [3]]"
