**Mounting Drive to Access the Dataset** (done by: Amna Shahid -Team AI)

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


**Libraries**

In [6]:
# Importing required libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

**Data Pre-Processing**

In [7]:
# Loading the dataset from drive
df = pd.read_csv('/content/drive/MyDrive/AI Fellowship/train.csv')

In [8]:
# Handling missing values
# checking for missing values in the columns
print(df.isnull().sum())

# filling missing values with 'median' in numerical columns
num_cols = ['AccountAge', 'MonthlyCharges', 'TotalCharges', 'ViewingHoursPerWeek',
                     'AverageViewingDuration', 'ContentDownloadsPerMonth', 'UserRating',
                     'SupportTicketsPerMonth', 'WatchlistSize']

df[num_cols] = df[num_cols].fillna(df[num_cols].median())

AccountAge                  0
MonthlyCharges              0
TotalCharges                0
SubscriptionType            0
PaymentMethod               0
PaperlessBilling            0
ContentType                 0
MultiDeviceAccess           0
DeviceRegistered            0
ViewingHoursPerWeek         0
AverageViewingDuration      0
ContentDownloadsPerMonth    0
GenrePreference             0
UserRating                  0
SupportTicketsPerMonth      0
Gender                      0
WatchlistSize               0
ParentalControl             0
SubtitlesEnabled            0
CustomerID                  0
Churn                       0
dtype: int64


In [9]:
# Encoding variables to have numerical values
bin_cols = ['PaperlessBilling', 'MultiDeviceAccess', 'ParentalControl', 'SubtitlesEnabled']
for col in bin_cols:
    df[col] = df[col].apply(lambda x: 1 if x == 'Yes' else 0)

encode_labels = LabelEncoder()
categorical_columns = ['SubscriptionType', 'PaymentMethod', 'ContentType', 'DeviceRegistered',
                       'GenrePreference', 'Gender']

for col in categorical_columns:
    df[col] = encode_labels.fit_transform(df[col])

In [10]:
# Scaling numerical features for standardization (to remove median values and make a standard unit value)
scale = StandardScaler()
df[num_cols] = scale.fit_transform(df[num_cols])

In [11]:
# Splitting the dataset into features (x) and target variable (y)
X = df.drop(['CustomerID', 'Churn'], axis=1)
y = df['Churn']

In [12]:
# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# displaying training and testing sets
print(f"Training set size: {X_train.shape}")
print(f"Test set size: {X_test.shape}")


Training set size: (195029, 19)
Test set size: (48758, 19)


**Logistic Regression Model Training** (For churn prediction for Subscription-Based Services)

In [13]:
# Import libraries
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [14]:
# Initialization
logreg = LogisticRegression(random_state=42)

# Model Training
logreg.fit(X_train, y_train)

# Test dataset Prediction
y_pred = logreg.predict(X_test)

In [16]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Preformance Evaluation
# Model Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.3f}")  # for upto 3 values after decimal

# Model Confusion matrix (to tell how many overall predictions were correct)
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Model Precision, Recall, F1-Score
precision = precision_score(y_test, y_pred, average='binary')
recall = recall_score(y_test, y_pred, average='binary')
f1 = f1_score(y_test, y_pred, average='binary')

# Print the results
print(f"Precision: {precision:.3f}")
print(f"Recall: {recall:.3f}")
print(f"F1-Score: {f1:.3f}")

Accuracy: 0.824
Confusion Matrix:
[[39188   780]
 [ 7788  1002]]
Precision: 0.562
Recall: 0.114
F1-Score: 0.190


**Uploading Model to be used as API**

In [17]:
import joblib

# Save the trained logistic regression model to a file
joblib.dump(logreg, 'logreg.pkl')


['logreg.pkl']

**AI-Based Platform Development**

In [18]:
pip install streamlit

Collecting streamlit
  Downloading streamlit-1.38.0-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting tenacity<9,>=8.1.0 (from streamlit)
  Downloading tenacity-8.5.0-py3-none-any.whl.metadata (1.2 kB)
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Downloading GitPython-3.1.43-py3-none-any.whl.metadata (13 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting watchdog<5,>=2.1.5 (from streamlit)
  Downloading watchdog-4.0.2-py3-none-manylinux2014_x86_64.whl.metadata (38 kB)
Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.19,<4,>=3.0.7->streamlit)
  Downloading gitdb-4.0.11-py3-none-any.whl.metadata (1.2 kB)
Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit)
  Downloading smmap-5.0.1-py3-none-any.whl.metadata (4.3 kB)
Downloading streamlit-1.38.0-py2.py3-none-any.whl (8.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.7/8.7 MB[0m [31m41.4 MB

In [20]:
import streamlit as st
import joblib
import pandas as pd

# Loading the previously uploaded model
loaded_model = joblib.load('logreg.pkl')

st.title('Customer Churn Prediction')

# user input
account_age = st.number_input('Account Age', 1, 100)
monthly_charges = st.number_input('Monthly Charges', 0.0, 1000.0)

# form for input submission
if st.button('Predict'):
    data = pd.DataFrame([[account_age, monthly_charges]])
    prediction = loaded_model.predict(data)
    st.write('Prediction:', 'Churn' if prediction[0] == 1 else 'No Churn')


2024-09-20 16:41:36.273 
  command:

    streamlit run /usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2024-09-20 16:41:36.291 Session state does not function when running a script without `streamlit run`


This code can be run by command "Streamlit run Final_Project_AI.py" after downloading this file and installing essential libraries in VSCode