In [None]:
import io

In [None]:
import numpy as np

In [None]:
import pandas as pd

In [None]:
import seaborn as sns

In [None]:
from sklearn.svm import SVC

In [None]:
import matplotlib.pyplot as plt

In [None]:
from sklearn.pipeline import Pipeline

In [None]:
from sklearn.decomposition import PCA

In [None]:
from sklearn.compose import ColumnTransformer

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, confusion_matrix

In [None]:
source_reference = [
    content_fetcher.SourceReference(id='data.csv', type='text/csv')
]

In [None]:
try:
    csv_content = content_fetcher.fetch(query="data.csv content", source_references=source_reference)
    print("CSV content fetched successfully.")

    # Load the dataset from the fetched content using StringIO
    df = pd.read_csv(io.StringIO(csv_content))

except Exception as e:
    print(f"Error fetching or loading CSV: {e}")
    exit()


In [None]:
print("Dataset Info:")
df.info()
print("\nFirst 5 rows of the dataset:")
print(df.head())
print("\nValue counts for 'churn' column:")
print(df['churn'].value_counts())

In [None]:
X = df.drop('churn', axis=1)
y = df['churn']

In [None]:
y = y.map({'True.': 1, 'False.': 0})

In [None]:
categorical_features = X.select_dtypes(include=['object']).columns
numerical_features = X.select_dtypes(include=['int64', 'float64']).columns

In [None]:
numerical_transformer = StandardScaler()

In [None]:
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

In [None]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])

In [None]:
pipeline_linear_svm = Pipeline(steps=[('preprocessor', preprocessor),
                                      ('svm', SVC(kernel='linear', random_state=42, probability=True))])

In [None]:
pipeline_rbf_svm = Pipeline(steps=[('preprocessor', preprocessor),
                                   ('svm', SVC(kernel='rbf', random_state=42, probability=True))])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
print("\nTraining Linear SVM...")
pipeline_linear_svm.fit(X_train, y_train)
print("Linear SVM trained.")

In [None]:
print("\nTraining RBF SVM...")
pipeline_rbf_svm.fit(X_train, y_train)
print("RBF SVM trained.")

In [None]:
def evaluate_model(model, X_test, y_test, model_name):
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:, 1]

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_prob)
    cm = confusion_matrix(y_test, y_pred)

    print(f"\n--- {model_name} Performance ---")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"AUC: {auc:.4f}")
    print("Confusion Matrix:")
    print(cm)

    plt.figure(figsize=(6, 4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False,
                xticklabels=['Predicted No Churn (0)', 'Predicted Churn (1)'],
                yticklabels=['Actual No Churn (0)', 'Actual Churn (1)'])
    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.show()

In [None]:
evaluate_model(pipeline_linear_svm, X_test, y_test, "Linear SVM")
evaluate_model(pipeline_rbf_svm, X_test, y_test, "RBF SVM")

In [None]:
print("\nGenerating Decision Boundary Visualization (using PCA)...")
preprocessor_for_pca = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])
X_processed_for_pca = preprocessor_for_pca.fit_transform(X)

In [None]:
pca = PCA(n_components=2, random_state=42)
X_pca = pca.fit_transform(X_processed_for_pca)

In [None]:
svm_visual = SVC(kernel='rbf', random_state=42, gamma='scale')
svm_visual.fit(X_pca, y)

In [None]:
x_min, x_max = X_pca[:, 0].min() - 1, X_pca[:, 0].max() + 1
y_min, y_max = X_pca[:, 1].min() - 1, X_pca[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
                     np.arange(y_min, y_max, 0.02))

In [None]:
Z = svm_visual.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

In [None]:
plt.figure(figsize=(10, 7))
plt.contourf(xx, yy, Z, alpha=0.8, cmap=plt.cm.coolwarm)
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, cmap=plt.cm.coolwarm, s=20, edgecolors='k')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.title('SVM Decision Boundary (RBF Kernel, PCA-reduced Data)')
plt.colorbar(label='Churn (0=No, 1=Yes)')
plt.show()

In [None]:
print("\nSVM Classification task completed.")