In [1]:
import io

In [2]:
import numpy as np

In [3]:
import pandas as pd

In [4]:
import seaborn as sns

In [5]:
from sklearn.svm import SVC

In [6]:
import matplotlib.pyplot as plt

In [7]:
from sklearn.pipeline import Pipeline

In [8]:
from sklearn.decomposition import PCA

In [9]:
from sklearn.compose import ColumnTransformer

In [10]:
from sklearn.model_selection import train_test_split

In [11]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder

In [12]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, confusion_matrix

In [13]:
class SourceReference:
    def __init__(self, id, type):
        self.id = id
        self.type = type

source_reference = [
    SourceReference(id='data.csv', type='text/csv')
]

In [14]:
try:
    df = pd.read_csv('data.csv')
    print("CSV content loaded successfully.")
    print(df.head())
except Exception as e:
    print(f"Error loading CSV: {e}")

CSV content loaded successfully.
  State  Account length  Area code International plan Voice mail plan  \
0    LA             117        408                 No              No   
1    IN              65        415                 No              No   
2    NY             161        415                 No              No   
3    SC             111        415                 No              No   
4    HI              49        510                 No              No   

   Number vmail messages  Total day minutes  Total day calls  \
0                      0              184.5               97   
1                      0              129.1              137   
2                      0              332.9               67   
3                      0              110.4              103   
4                      0              119.3              117   

   Total day charge  Total eve minutes  Total eve calls  Total eve charge  \
0             31.37              351.6               80           

In [15]:
df = pd.read_csv('data.csv')

In [16]:
print("Dataset Info:")
df.info()
print("\nFirst 5 rows of the dataset:")
print(df.head())
print("\nValue counts for 'churn' column:")
print(df['churn'].value_counts())

Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 667 entries, 0 to 666
Data columns (total 20 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   State                   667 non-null    object 
 1   Account length          667 non-null    int64  
 2   Area code               667 non-null    int64  
 3   International plan      667 non-null    object 
 4   Voice mail plan         667 non-null    object 
 5   Number vmail messages   667 non-null    int64  
 6   Total day minutes       667 non-null    float64
 7   Total day calls         667 non-null    int64  
 8   Total day charge        667 non-null    float64
 9   Total eve minutes       667 non-null    float64
 10  Total eve calls         667 non-null    int64  
 11  Total eve charge        667 non-null    float64
 12  Total night minutes     667 non-null    float64
 13  Total night calls       667 non-null    int64  
 14  Total night charge      667 

KeyError: 'churn'

In [None]:
X = df.drop('churn', axis=1)
y = df['churn']

In [None]:
y = y.map({'True.': 1, 'False.': 0})

In [None]:
categorical_features = X.select_dtypes(include=['object']).columns
numerical_features = X.select_dtypes(include=['int64', 'float64']).columns

In [None]:
numerical_transformer = StandardScaler()

In [None]:
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

In [None]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])

In [None]:
pipeline_linear_svm = Pipeline(steps=[('preprocessor', preprocessor),
                                      ('svm', SVC(kernel='linear', random_state=42, probability=True))])

In [None]:
pipeline_rbf_svm = Pipeline(steps=[('preprocessor', preprocessor),
                                   ('svm', SVC(kernel='rbf', random_state=42, probability=True))])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
print("\nTraining Linear SVM...")
pipeline_linear_svm.fit(X_train, y_train)
print("Linear SVM trained.")

In [None]:
print("\nTraining RBF SVM...")
pipeline_rbf_svm.fit(X_train, y_train)
print("RBF SVM trained.")

In [None]:
def evaluate_model(model, X_test, y_test, model_name):
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:, 1]

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_prob)
    cm = confusion_matrix(y_test, y_pred)

    print(f"\n--- {model_name} Performance ---")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"AUC: {auc:.4f}")
    print("Confusion Matrix:")
    print(cm)

    plt.figure(figsize=(6, 4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False,
                xticklabels=['Predicted No Churn (0)', 'Predicted Churn (1)'],
                yticklabels=['Actual No Churn (0)', 'Actual Churn (1)'])
    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.show()

In [None]:
evaluate_model(pipeline_linear_svm, X_test, y_test, "Linear SVM")
evaluate_model(pipeline_rbf_svm, X_test, y_test, "RBF SVM")

In [None]:
print("\nGenerating Decision Boundary Visualization (using PCA)...")
preprocessor_for_pca = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])
X_processed_for_pca = preprocessor_for_pca.fit_transform(X)

In [None]:
pca = PCA(n_components=2, random_state=42)
X_pca = pca.fit_transform(X_processed_for_pca)

In [None]:
svm_visual = SVC(kernel='rbf', random_state=42, gamma='scale')
svm_visual.fit(X_pca, y)

In [None]:
x_min, x_max = X_pca[:, 0].min() - 1, X_pca[:, 0].max() + 1
y_min, y_max = X_pca[:, 1].min() - 1, X_pca[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
                     np.arange(y_min, y_max, 0.02))

In [None]:
Z = svm_visual.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

In [None]:
plt.figure(figsize=(10, 7))
plt.contourf(xx, yy, Z, alpha=0.8, cmap=plt.cm.coolwarm)
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, cmap=plt.cm.coolwarm, s=20, edgecolors='k')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.title('SVM Decision Boundary (RBF Kernel, PCA-reduced Data)')
plt.colorbar(label='Churn (0=No, 1=Yes)')
plt.show()

In [None]:
print("\nSVM Classification task completed.")