In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.preprocessing import MinMaxScaler , LabelEncoder
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import plotly.express as px
import plotly.graph_objects as go   

In [2]:
#Read CSV file
df=pd.read_csv("WA_Fn-UseC_-Telco-Customer-Churn.csv" , index_col='customerID')

In [3]:
df

Unnamed: 0_level_0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
customerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.30,1840.75,No
9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.70,151.65,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6840-RESVB,Male,0,Yes,Yes,24,Yes,Yes,DSL,Yes,No,Yes,Yes,Yes,Yes,One year,Yes,Mailed check,84.80,1990.5,No
2234-XADUH,Female,0,Yes,Yes,72,Yes,Yes,Fiber optic,No,Yes,Yes,No,Yes,Yes,One year,Yes,Credit card (automatic),103.20,7362.9,No
4801-JZAZL,Female,0,Yes,Yes,11,No,No phone service,DSL,Yes,No,No,No,No,No,Month-to-month,Yes,Electronic check,29.60,346.45,No
8361-LTMKD,Male,1,Yes,No,4,Yes,Yes,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Mailed check,74.40,306.6,Yes


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7043 entries, 7590-VHVEG to 3186-AJIEK
Data columns (total 20 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   gender            7043 non-null   object 
 1   SeniorCitizen     7043 non-null   int64  
 2   Partner           7043 non-null   object 
 3   Dependents        7043 non-null   object 
 4   tenure            7043 non-null   int64  
 5   PhoneService      7043 non-null   object 
 6   MultipleLines     7043 non-null   object 
 7   InternetService   7043 non-null   object 
 8   OnlineSecurity    7043 non-null   object 
 9   OnlineBackup      7043 non-null   object 
 10  DeviceProtection  7043 non-null   object 
 11  TechSupport       7043 non-null   object 
 12  StreamingTV       7043 non-null   object 
 13  StreamingMovies   7043 non-null   object 
 14  Contract          7043 non-null   object 
 15  PaperlessBilling  7043 non-null   object 
 16  PaymentMethod     7043 non-null 

In [5]:
#Change Total charges to numeric 
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7043 entries, 7590-VHVEG to 3186-AJIEK
Data columns (total 20 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   gender            7043 non-null   object 
 1   SeniorCitizen     7043 non-null   int64  
 2   Partner           7043 non-null   object 
 3   Dependents        7043 non-null   object 
 4   tenure            7043 non-null   int64  
 5   PhoneService      7043 non-null   object 
 6   MultipleLines     7043 non-null   object 
 7   InternetService   7043 non-null   object 
 8   OnlineSecurity    7043 non-null   object 
 9   OnlineBackup      7043 non-null   object 
 10  DeviceProtection  7043 non-null   object 
 11  TechSupport       7043 non-null   object 
 12  StreamingTV       7043 non-null   object 
 13  StreamingMovies   7043 non-null   object 
 14  Contract          7043 non-null   object 
 15  PaperlessBilling  7043 non-null   object 
 16  PaymentMethod     7043 non-null 

In [7]:
#Fill mising values of Total Charges with Avreage of total charges 
Charges_mean = df['TotalCharges'].mean()
df['TotalCharges'].fillna(Charges_mean, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['TotalCharges'].fillna(Charges_mean, inplace=True)


In [8]:
df

Unnamed: 0_level_0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
customerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.50,No
3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.30,1840.75,No
9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.70,151.65,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6840-RESVB,Male,0,Yes,Yes,24,Yes,Yes,DSL,Yes,No,Yes,Yes,Yes,Yes,One year,Yes,Mailed check,84.80,1990.50,No
2234-XADUH,Female,0,Yes,Yes,72,Yes,Yes,Fiber optic,No,Yes,Yes,No,Yes,Yes,One year,Yes,Credit card (automatic),103.20,7362.90,No
4801-JZAZL,Female,0,Yes,Yes,11,No,No phone service,DSL,Yes,No,No,No,No,No,Month-to-month,Yes,Electronic check,29.60,346.45,No
8361-LTMKD,Male,1,Yes,No,4,Yes,Yes,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Mailed check,74.40,306.60,Yes


In [9]:
#Specify dtype = object columns to one hot encode
df.select_dtypes('object').columns

Index(['gender', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines',
       'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection',
       'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract',
       'PaperlessBilling', 'PaymentMethod', 'Churn'],
      dtype='object')

In [10]:
df_encoded = pd.get_dummies(df, columns = ['gender', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines',
       'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection',
       'TechSupport', 'StreamingTV', 'StreamingMovies',
       'PaperlessBilling', 'PaymentMethod'])
df_encoded

Unnamed: 0_level_0,SeniorCitizen,tenure,Contract,MonthlyCharges,TotalCharges,Churn,gender_Female,gender_Male,Partner_No,Partner_Yes,...,StreamingTV_Yes,StreamingMovies_No,StreamingMovies_No internet service,StreamingMovies_Yes,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
customerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7590-VHVEG,0,1,Month-to-month,29.85,29.85,No,True,False,False,True,...,False,True,False,False,False,True,False,False,True,False
5575-GNVDE,0,34,One year,56.95,1889.50,No,False,True,True,False,...,False,True,False,False,True,False,False,False,False,True
3668-QPYBK,0,2,Month-to-month,53.85,108.15,Yes,False,True,True,False,...,False,True,False,False,False,True,False,False,False,True
7795-CFOCW,0,45,One year,42.30,1840.75,No,False,True,True,False,...,False,True,False,False,True,False,True,False,False,False
9237-HQITU,0,2,Month-to-month,70.70,151.65,Yes,True,False,True,False,...,False,True,False,False,False,True,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6840-RESVB,0,24,One year,84.80,1990.50,No,False,True,False,True,...,True,False,False,True,False,True,False,False,False,True
2234-XADUH,0,72,One year,103.20,7362.90,No,True,False,False,True,...,True,False,False,True,False,True,False,True,False,False
4801-JZAZL,0,11,Month-to-month,29.60,346.45,No,True,False,False,True,...,False,True,False,False,False,True,False,False,True,False
8361-LTMKD,1,4,Month-to-month,74.40,306.60,Yes,False,True,False,True,...,False,True,False,False,False,True,False,False,False,True


In [11]:
replaced = {"Month-to-month": "1" , "One year": "2" , "Two year": "3"}

In [12]:
df_encoded['Contract'] = df_encoded['Contract'].replace(replaced)

In [13]:
df_encoded

Unnamed: 0_level_0,SeniorCitizen,tenure,Contract,MonthlyCharges,TotalCharges,Churn,gender_Female,gender_Male,Partner_No,Partner_Yes,...,StreamingTV_Yes,StreamingMovies_No,StreamingMovies_No internet service,StreamingMovies_Yes,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
customerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7590-VHVEG,0,1,1,29.85,29.85,No,True,False,False,True,...,False,True,False,False,False,True,False,False,True,False
5575-GNVDE,0,34,2,56.95,1889.50,No,False,True,True,False,...,False,True,False,False,True,False,False,False,False,True
3668-QPYBK,0,2,1,53.85,108.15,Yes,False,True,True,False,...,False,True,False,False,False,True,False,False,False,True
7795-CFOCW,0,45,2,42.30,1840.75,No,False,True,True,False,...,False,True,False,False,True,False,True,False,False,False
9237-HQITU,0,2,1,70.70,151.65,Yes,True,False,True,False,...,False,True,False,False,False,True,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6840-RESVB,0,24,2,84.80,1990.50,No,False,True,False,True,...,True,False,False,True,False,True,False,False,False,True
2234-XADUH,0,72,2,103.20,7362.90,No,True,False,False,True,...,True,False,False,True,False,True,False,True,False,False
4801-JZAZL,0,11,1,29.60,346.45,No,True,False,False,True,...,False,True,False,False,False,True,False,False,True,False
8361-LTMKD,1,4,1,74.40,306.60,Yes,False,True,False,True,...,False,True,False,False,False,True,False,False,False,True


In [14]:

df_encoded['Contract'] = pd.to_numeric(df_encoded['Contract'])

In [15]:
df_encoded

Unnamed: 0_level_0,SeniorCitizen,tenure,Contract,MonthlyCharges,TotalCharges,Churn,gender_Female,gender_Male,Partner_No,Partner_Yes,...,StreamingTV_Yes,StreamingMovies_No,StreamingMovies_No internet service,StreamingMovies_Yes,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
customerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7590-VHVEG,0,1,1,29.85,29.85,No,True,False,False,True,...,False,True,False,False,False,True,False,False,True,False
5575-GNVDE,0,34,2,56.95,1889.50,No,False,True,True,False,...,False,True,False,False,True,False,False,False,False,True
3668-QPYBK,0,2,1,53.85,108.15,Yes,False,True,True,False,...,False,True,False,False,False,True,False,False,False,True
7795-CFOCW,0,45,2,42.30,1840.75,No,False,True,True,False,...,False,True,False,False,True,False,True,False,False,False
9237-HQITU,0,2,1,70.70,151.65,Yes,True,False,True,False,...,False,True,False,False,False,True,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6840-RESVB,0,24,2,84.80,1990.50,No,False,True,False,True,...,True,False,False,True,False,True,False,False,False,True
2234-XADUH,0,72,2,103.20,7362.90,No,True,False,False,True,...,True,False,False,True,False,True,False,True,False,False
4801-JZAZL,0,11,1,29.60,346.45,No,True,False,False,True,...,False,True,False,False,False,True,False,False,True,False
8361-LTMKD,1,4,1,74.40,306.60,Yes,False,True,False,True,...,False,True,False,False,False,True,False,False,False,True


In [16]:
df_encoded=df_encoded.drop_duplicates()

In [17]:
df_encoded

Unnamed: 0_level_0,SeniorCitizen,tenure,Contract,MonthlyCharges,TotalCharges,Churn,gender_Female,gender_Male,Partner_No,Partner_Yes,...,StreamingTV_Yes,StreamingMovies_No,StreamingMovies_No internet service,StreamingMovies_Yes,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
customerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7590-VHVEG,0,1,1,29.85,29.85,No,True,False,False,True,...,False,True,False,False,False,True,False,False,True,False
5575-GNVDE,0,34,2,56.95,1889.50,No,False,True,True,False,...,False,True,False,False,True,False,False,False,False,True
3668-QPYBK,0,2,1,53.85,108.15,Yes,False,True,True,False,...,False,True,False,False,False,True,False,False,False,True
7795-CFOCW,0,45,2,42.30,1840.75,No,False,True,True,False,...,False,True,False,False,True,False,True,False,False,False
9237-HQITU,0,2,1,70.70,151.65,Yes,True,False,True,False,...,False,True,False,False,False,True,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6840-RESVB,0,24,2,84.80,1990.50,No,False,True,False,True,...,True,False,False,True,False,True,False,False,False,True
2234-XADUH,0,72,2,103.20,7362.90,No,True,False,False,True,...,True,False,False,True,False,True,False,True,False,False
4801-JZAZL,0,11,1,29.60,346.45,No,True,False,False,True,...,False,True,False,False,False,True,False,False,True,False
8361-LTMKD,1,4,1,74.40,306.60,Yes,False,True,False,True,...,False,True,False,False,False,True,False,False,False,True


In [18]:
label_encoder = LabelEncoder()
df_encoded['Churn'] = label_encoder.fit_transform(df_encoded['Churn'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_encoded['Churn'] = label_encoder.fit_transform(df_encoded['Churn'])


In [19]:
df_encoded

Unnamed: 0_level_0,SeniorCitizen,tenure,Contract,MonthlyCharges,TotalCharges,Churn,gender_Female,gender_Male,Partner_No,Partner_Yes,...,StreamingTV_Yes,StreamingMovies_No,StreamingMovies_No internet service,StreamingMovies_Yes,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
customerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7590-VHVEG,0,1,1,29.85,29.85,0,True,False,False,True,...,False,True,False,False,False,True,False,False,True,False
5575-GNVDE,0,34,2,56.95,1889.50,0,False,True,True,False,...,False,True,False,False,True,False,False,False,False,True
3668-QPYBK,0,2,1,53.85,108.15,1,False,True,True,False,...,False,True,False,False,False,True,False,False,False,True
7795-CFOCW,0,45,2,42.30,1840.75,0,False,True,True,False,...,False,True,False,False,True,False,True,False,False,False
9237-HQITU,0,2,1,70.70,151.65,1,True,False,True,False,...,False,True,False,False,False,True,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6840-RESVB,0,24,2,84.80,1990.50,0,False,True,False,True,...,True,False,False,True,False,True,False,False,False,True
2234-XADUH,0,72,2,103.20,7362.90,0,True,False,False,True,...,True,False,False,True,False,True,False,True,False,False
4801-JZAZL,0,11,1,29.60,346.45,0,True,False,False,True,...,False,True,False,False,False,True,False,False,True,False
8361-LTMKD,1,4,1,74.40,306.60,1,False,True,False,True,...,False,True,False,False,False,True,False,False,False,True


In [20]:
df_encoded['Churn'] = pd.to_numeric(df_encoded['Churn'], errors='coerce')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_encoded['Churn'] = pd.to_numeric(df_encoded['Churn'], errors='coerce')


In [21]:
df_encoded

Unnamed: 0_level_0,SeniorCitizen,tenure,Contract,MonthlyCharges,TotalCharges,Churn,gender_Female,gender_Male,Partner_No,Partner_Yes,...,StreamingTV_Yes,StreamingMovies_No,StreamingMovies_No internet service,StreamingMovies_Yes,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
customerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7590-VHVEG,0,1,1,29.85,29.85,0,True,False,False,True,...,False,True,False,False,False,True,False,False,True,False
5575-GNVDE,0,34,2,56.95,1889.50,0,False,True,True,False,...,False,True,False,False,True,False,False,False,False,True
3668-QPYBK,0,2,1,53.85,108.15,1,False,True,True,False,...,False,True,False,False,False,True,False,False,False,True
7795-CFOCW,0,45,2,42.30,1840.75,0,False,True,True,False,...,False,True,False,False,True,False,True,False,False,False
9237-HQITU,0,2,1,70.70,151.65,1,True,False,True,False,...,False,True,False,False,False,True,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6840-RESVB,0,24,2,84.80,1990.50,0,False,True,False,True,...,True,False,False,True,False,True,False,False,False,True
2234-XADUH,0,72,2,103.20,7362.90,0,True,False,False,True,...,True,False,False,True,False,True,False,True,False,False
4801-JZAZL,0,11,1,29.60,346.45,0,True,False,False,True,...,False,True,False,False,False,True,False,False,True,False
8361-LTMKD,1,4,1,74.40,306.60,1,False,True,False,True,...,False,True,False,False,False,True,False,False,False,True


In [22]:
df_churn = df_encoded[['Churn']]

In [23]:
df_churn

Unnamed: 0_level_0,Churn
customerID,Unnamed: 1_level_1
7590-VHVEG,0
5575-GNVDE,0
3668-QPYBK,1
7795-CFOCW,0
9237-HQITU,1
...,...
6840-RESVB,0
2234-XADUH,0
4801-JZAZL,0
8361-LTMKD,1


In [24]:
df_encoded = df_encoded.drop('Churn' , axis=1)

In [25]:
df_encoded

Unnamed: 0_level_0,SeniorCitizen,tenure,Contract,MonthlyCharges,TotalCharges,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,...,StreamingTV_Yes,StreamingMovies_No,StreamingMovies_No internet service,StreamingMovies_Yes,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
customerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7590-VHVEG,0,1,1,29.85,29.85,True,False,False,True,True,...,False,True,False,False,False,True,False,False,True,False
5575-GNVDE,0,34,2,56.95,1889.50,False,True,True,False,True,...,False,True,False,False,True,False,False,False,False,True
3668-QPYBK,0,2,1,53.85,108.15,False,True,True,False,True,...,False,True,False,False,False,True,False,False,False,True
7795-CFOCW,0,45,2,42.30,1840.75,False,True,True,False,True,...,False,True,False,False,True,False,True,False,False,False
9237-HQITU,0,2,1,70.70,151.65,True,False,True,False,True,...,False,True,False,False,False,True,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6840-RESVB,0,24,2,84.80,1990.50,False,True,False,True,False,...,True,False,False,True,False,True,False,False,False,True
2234-XADUH,0,72,2,103.20,7362.90,True,False,False,True,False,...,True,False,False,True,False,True,False,True,False,False
4801-JZAZL,0,11,1,29.60,346.45,True,False,False,True,False,...,False,True,False,False,False,True,False,False,True,False
8361-LTMKD,1,4,1,74.40,306.60,False,True,False,True,True,...,False,True,False,False,False,True,False,False,False,True


In [26]:
df_encoded.select_dtypes('number').columns

Index(['SeniorCitizen', 'tenure', 'Contract', 'MonthlyCharges',
       'TotalCharges'],
      dtype='object')

In [27]:
df_encoded.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7021 entries, 7590-VHVEG to 3186-AJIEK
Data columns (total 43 columns):
 #   Column                                   Non-Null Count  Dtype  
---  ------                                   --------------  -----  
 0   SeniorCitizen                            7021 non-null   int64  
 1   tenure                                   7021 non-null   int64  
 2   Contract                                 7021 non-null   int64  
 3   MonthlyCharges                           7021 non-null   float64
 4   TotalCharges                             7021 non-null   float64
 5   gender_Female                            7021 non-null   bool   
 6   gender_Male                              7021 non-null   bool   
 7   Partner_No                               7021 non-null   bool   
 8   Partner_Yes                              7021 non-null   bool   
 9   Dependents_No                            7021 non-null   bool   
 10  Dependents_Yes                        

In [28]:
# Identify numerical columns
numerical_cols = df_encoded.select_dtypes(include=['int64', 'float64']).columns

# Initialize the MinMaxScaler
scaler = MinMaxScaler()

# Normalize only the numerical columns
df_encoded[numerical_cols] = scaler.fit_transform(df_encoded[numerical_cols])

# Display the updated DataFrame
df_encoded


Unnamed: 0_level_0,SeniorCitizen,tenure,Contract,MonthlyCharges,TotalCharges,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,...,StreamingTV_Yes,StreamingMovies_No,StreamingMovies_No internet service,StreamingMovies_Yes,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
customerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7590-VHVEG,0.0,0.013889,0.0,0.115423,0.001275,True,False,False,True,True,...,False,True,False,False,False,True,False,False,True,False
5575-GNVDE,0.0,0.472222,0.5,0.385075,0.215867,False,True,True,False,True,...,False,True,False,False,True,False,False,False,False,True
3668-QPYBK,0.0,0.027778,0.0,0.354229,0.010310,False,True,True,False,True,...,False,True,False,False,False,True,False,False,False,True
7795-CFOCW,0.0,0.625000,0.5,0.239303,0.210241,False,True,True,False,True,...,False,True,False,False,True,False,True,False,False,False
9237-HQITU,0.0,0.027778,0.0,0.521891,0.015330,True,False,True,False,True,...,False,True,False,False,False,True,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6840-RESVB,0.0,0.333333,0.5,0.662189,0.227521,False,True,False,True,False,...,True,False,False,True,False,True,False,False,False,True
2234-XADUH,0.0,1.000000,0.5,0.845274,0.847461,True,False,False,True,False,...,True,False,False,True,False,True,False,True,False,False
4801-JZAZL,0.0,0.152778,0.0,0.112935,0.037809,True,False,False,True,False,...,False,True,False,False,False,True,False,False,True,False
8361-LTMKD,1.0,0.055556,0.0,0.558706,0.033210,False,True,False,True,True,...,False,True,False,False,False,True,False,False,False,True


In [29]:
# Fit and transform the data
df_churn = pd.DataFrame(scaler.fit_transform(df_churn), columns=df_churn.columns)

df_churn

Unnamed: 0,Churn
0,0.0
1,0.0
2,1.0
3,0.0
4,1.0
...,...
7016,0.0
7017,0.0
7018,0.0
7019,1.0


In [30]:
df_encoded.to_csv('data_no_Churn(Yusuf Muhammed Bedir).csv', index=True)

In [31]:
df_churn.to_csv('Churn(Yusuf Muhammed Bedir).csv', index=True)