In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout
from sklearn.metrics import confusion_matrix

In [2]:
# Load the dataset
customer = pd.read_csv('/content/drive/MyDrive/Colab/customer_churn.csv')

# View the first few rows
print(customer.head())

# Check for missing values
print(customer.isnull().sum())

# Descriptive statistics
print(customer.describe())

FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/Colab/customer_churn.csv'

In [None]:
# Data Manipulation
#a
print(sum(customer['gender'] == "Male"))

#b
print(sum(customer['InternetService'] == "DSL"))

#c
new_customer = customer[(customer['gender'] == 'Female') &
                        (customer['SeniorCitizen'] == 1) &
                        (customer['PaymentMethod'] == 'Mailed check')]

print(new_customer.head())

#d
customer['TotalCharges'] = pd.to_numeric(customer['TotalCharges'], errors='coerce')
new_customer = customer[(customer['tenure'] < 10) | (customer['TotalCharges'] < 500)]

print(new_customer.head())

In [None]:
# Data Visualization
#a
names = customer["Churn"].value_counts().index.tolist()
sizes = customer["Churn"].value_counts().tolist()

plt.pie(sizes, labels=names, autopct="%0.1f%%")
plt.title('Churn Distribution')
plt.show()

#b
plt.bar(customer['InternetService'].value_counts().index.tolist(),
        customer['InternetService'].value_counts().tolist(), color='orange')
plt.xlabel('Categories of Internet Service')
plt.ylabel('Count of categories')
plt.title('Distribution of Internet Service')
plt.show()

In [None]:
# Model Building

#a
x = customer[['tenure']]
y = customer[['Churn']]

# Convert 'Churn' column to numeric (assuming it's currently string 'Yes'/'No')
y['Churn'] = y['Churn'].map({'Yes': 1, 'No': 0})

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.30, random_state=42)

model = Sequential()
model.add(Dense(12, input_dim=1, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

history = model.fit(x_train, y_train, epochs=150, validation_data=(x_test, y_test))

y_pred = (model.predict(x_test) > 0.5).astype(int)
print(confusion_matrix(y_test, y_pred))

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Model Accuracy')
plt.legend(['Train', 'Validation'])
plt.show()

In [None]:
#b
model = Sequential()
model.add(Dense(12, input_dim=1, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(8, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
history = model.fit(x_train, y_train, epochs=150, validation_data=(x_test, y_test))

y_pred = (model.predict(x_test) > 0.5).astype(int)
print(confusion_matrix(y_test, y_pred))

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Model Accuracy with Dropout')
plt.legend(['Train', 'Validation'])
plt.show()

In [None]:
#c
x = customer[['MonthlyCharges', 'tenure', 'TotalCharges']]
y = customer[['Churn']]

# Convert 'Churn' column to numeric (assuming it's currently string 'Yes'/'No')
y['Churn'] = y['Churn'].map({'Yes': 1, 'No': 0})

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.30, random_state=42)

model = Sequential()
model.add(Dense(12, input_dim=3, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
history = model.fit(x_train, y_train, epochs=150, validation_data=(x_test, y_test))

y_pred = (model.predict(x_test) > 0.5).astype(int)
print(confusion_matrix(y_test, y_pred))

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Model Accuracy with Multiple Features')
plt.legend(['Train', 'Validation'])
plt.show()