# Support Vector Machines

In [1]:
# Import required libraries

import numpy as np
import pandas as pd

%matplotlib inline 
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split



In [2]:
# Import the required data
data = pd.read_csv("data/telecom_churn.csv")
data.head()

Unnamed: 0,Churn,AccountWeeks,ContractRenewal,DataPlan,DataUsage,CustServCalls,DayMins,DayCalls,MonthlyCharge,OverageFee,RoamMins
0,0,128,1,1,2.7,1,265.1,110,89.0,9.87,10.0
1,0,107,1,1,3.7,1,161.6,123,82.0,9.78,13.7
2,0,137,1,0,0.0,0,243.4,114,52.0,6.06,12.2
3,0,84,0,0,0.0,2,299.4,71,57.0,3.1,6.6
4,0,75,0,0,0.0,3,166.7,113,41.0,7.42,10.1


In [3]:
# Check for missing values
data.isna().sum()

Churn              0
AccountWeeks       0
ContractRenewal    0
DataPlan           0
DataUsage          0
CustServCalls      0
DayMins            0
DayCalls           0
MonthlyCharge      0
OverageFee         0
RoamMins           0
dtype: int64

In [11]:
# split the data into features and labels
X, y = data.drop("Churn", axis=1).values, data["Churn"].values

# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train.shape, y_train.shape, X_test.shape, y_test.shape

((2666, 10), (2666,), (667, 10), (667,))

In [18]:
import numpy as np
from scipy.stats import multivariate_normal

class GDA:
    def fit(self, X, y):
        m, n = X.shape
        
        # 1. Compute ϕ
        self.phi = np.mean(y)
        
        # 2. Compute μ0 and μ1
        self.mu0 = X[y == 0].mean(axis=0)
        self.mu1 = X[y == 1].mean(axis=0)
        
        # 3. Compute Σ (shared covariance)
        sigma = np.zeros((n, n))
        for i in range(m):
            x_i = X[i]
            mu_yi = self.mu1 if y[i] == 1 else self.mu0
            sigma += np.outer(x_i - mu_yi, x_i - mu_yi)
        self.sigma = sigma / m
        
    def predict_proba(self, X):
        # Likelihoods
        p_x_given_y1 = multivariate_normal.pdf(X, mean=self.mu1, cov=self.sigma)
        p_x_given_y0 = multivariate_normal.pdf(X, mean=self.mu0, cov=self.sigma)
        
        # Posterior using Bayes Rule
        numerator = p_x_given_y1 * self.phi
        denominator = numerator + p_x_given_y0 * (1 - self.phi)
        return numerator / denominator  # P(y=1|x)
    
    def predict(self, X):
        return (self.predict_proba(X) >= 0.5).astype(int)


In [19]:
# 1. Train
model = GDA()
model.fit(X_train, y_train)

# 2. Predict
y_pred = model.predict(X_test)

# 3. Evaluate
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.87      0.96      0.91       566
           1       0.50      0.23      0.31       101

    accuracy                           0.85       667
   macro avg       0.69      0.59      0.61       667
weighted avg       0.82      0.85      0.82       667



## Sklearn Analysis

In [22]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

lda = LinearDiscriminantAnalysis()
lda.fit(X_train, y_train)


y_pred = lda.predict(X_test)

# 3. Evaluate
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.87      0.96      0.91       566
           1       0.50      0.23      0.31       101

    accuracy                           0.85       667
   macro avg       0.69      0.59      0.61       667
weighted avg       0.82      0.85      0.82       667



Our both models got same results