In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("C:\\Users\\DELL\\Documents\\Projects\\Churn Analysis\\Telco_customer_churn.csv")
df.head()
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 33 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   CustomerID         7043 non-null   object 
 1   Count              7043 non-null   int64  
 2   Country            7043 non-null   object 
 3   State              7043 non-null   object 
 4   City               7043 non-null   object 
 5   Zip Code           7043 non-null   int64  
 6   Lat Long           7043 non-null   object 
 7   Latitude           7043 non-null   float64
 8   Longitude          7043 non-null   float64
 9   Gender             7043 non-null   object 
 10  Senior Citizen     7043 non-null   object 
 11  Partner            7043 non-null   object 
 12  Dependents         7043 non-null   object 
 13  Tenure Months      7043 non-null   int64  
 14  Phone Service      7043 non-null   object 
 15  Multiple Lines     7043 non-null   object 
 16  Internet Service   7043 

In [2]:
# Convert TotalCharges to numeric
df['Total Charges'] = pd.to_numeric(df['Total Charges'], errors='coerce')

# Fill missing values
df['Total Charges'].fillna(df['Total Charges'].median(), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Total Charges'].fillna(df['Total Charges'].median(), inplace=True)


In [3]:
# Convert Churn to binary
df['Churn Label'] = df['Churn Label'].map({'Yes':1, 'No':0})

In [5]:
df['Churn Label'].value_counts(dropna=False)


Churn Label
0    5174
1    1869
Name: count, dtype: int64

In [7]:
df['RevenueRisk'] = df['Monthly Charges'] * df['Churn Label']

In [8]:
df['TenureGroup'] = pd.cut(df['Tenure Months'],
                           bins=[0,12,24,48,72],
                           labels=['0-1yr','1-2yr','2-4yr','4-6yr'])


In [9]:
df['HighValue'] = np.where(df['Monthly Charges'] > df['Monthly Charges'].median(), 1, 0)

In [10]:
total_revenue = df['Monthly Charges'].sum()
churn_rate = df['Churn Label'].mean()*100
revenue_at_risk = df[df['Churn Label']==1]['Monthly Charges'].sum()

total_revenue, churn_rate, revenue_at_risk

(np.float64(456116.6), np.float64(26.536987079369588), np.float64(139130.85))

In [11]:
df.groupby('Contract')['Churn Label'].mean()
df.groupby('Internet Service')['Churn Label'].mean()
df.groupby('Payment Method')['Churn Label'].mean()

Payment Method
Bank transfer (automatic)    0.167098
Credit card (automatic)      0.152431
Electronic check             0.452854
Mailed check                 0.191067
Name: Churn Label, dtype: float64

In [12]:
df['Churn Label'].value_counts(dropna=False)


Churn Label
0    5174
1    1869
Name: count, dtype: int64

In [13]:
df['Churn Value'].value_counts(dropna=False)


Churn Value
0    5174
1    1869
Name: count, dtype: int64

In [14]:
df['RiskSegment'] = 'Low Risk'

df.loc[(df['Churn Label']==1) & 
       (df['Monthly Charges'] > df['Monthly Charges'].median()),
       'RiskSegment'] = 'High Risk'


In [15]:
df.groupby('RiskSegment')['Monthly Charges'].sum()


RiskSegment
High Risk    110385.4
Low Risk     345731.2
Name: Monthly Charges, dtype: float64

In [16]:
df.groupby('TenureGroup')['Churn Label'].mean()


  df.groupby('TenureGroup')['Churn Label'].mean()


TenureGroup
0-1yr    0.476782
1-2yr    0.287109
2-4yr    0.203890
4-6yr    0.095132
Name: Churn Label, dtype: float64

In [23]:
df.groupby('RiskSegment').agg({
    'CustomerID':'count',
    'Monthly Charges':'sum',
    'Churn Label':'mean'
})


Unnamed: 0_level_0,CustomerID,Monthly Charges,Churn Label
RiskSegment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
High Risk,1237,110385.4,1.0
Low Risk,5806,345731.2,0.108853


In [25]:
df.to_csv(r"C:\\Users\\DELL\\Documents\\Projects\\Churn Analysis\\cleaned_churn_data.csv", index=False)