Question 6: Python Program – SVM with Linear Kernel (Iris Dataset)


In [1]:
from sklearn.datasets import load_iris
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train SVM with linear kernel
svm_linear = SVC(kernel="linear", random_state=42)
svm_linear.fit(X_train, y_train)

# Predictions
y_pred = svm_linear.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print("Model Accuracy:", accuracy)
print("Support Vectors:", svm_linear.support_vectors_)


Model Accuracy: 1.0
Support Vectors: [[4.8 3.4 1.9 0.2]
 [5.1 3.3 1.7 0.5]
 [4.5 2.3 1.3 0.3]
 [5.6 3.  4.5 1.5]
 [5.4 3.  4.5 1.5]
 [6.7 3.  5.  1.7]
 [5.9 3.2 4.8 1.8]
 [5.1 2.5 3.  1.1]
 [6.  2.7 5.1 1.6]
 [6.3 2.5 4.9 1.5]
 [6.1 2.9 4.7 1.4]
 [6.5 2.8 4.6 1.5]
 [6.9 3.1 4.9 1.5]
 [6.3 2.3 4.4 1.3]
 [6.3 2.8 5.1 1.5]
 [6.3 2.7 4.9 1.8]
 [6.  3.  4.8 1.8]
 [6.  2.2 5.  1.5]
 [6.2 2.8 4.8 1.8]
 [6.5 3.  5.2 2. ]
 [7.2 3.  5.8 1.6]
 [5.6 2.8 4.9 2. ]
 [5.9 3.  5.1 1.8]
 [4.9 2.5 4.5 1.7]]


Question 7: Python Program – Gaussian Naïve Bayes (Breast Cancer Dataset)

In [2]:
from sklearn.datasets import load_breast_cancer
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report

# Load dataset
data = load_breast_cancer()
X, y = data.data, data.target

# Train Gaussian Naïve Bayes
gnb = GaussianNB()
gnb.fit(X, y)

# Predictions
y_pred = gnb.predict(X)

# Classification report
print(classification_report(y, y_pred, target_names=data.target_names))


              precision    recall  f1-score   support

   malignant       0.95      0.89      0.92       212
      benign       0.94      0.97      0.95       357

    accuracy                           0.94       569
   macro avg       0.94      0.93      0.94       569
weighted avg       0.94      0.94      0.94       569



Question 8: Python Program – SVM with GridSearchCV (Wine Dataset)

In [3]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC

# Load dataset
wine = load_wine()
X, y = wine.data, wine.target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Parameter grid
param_grid = {'C': [0.1, 1, 10], 'gamma': [0.001, 0.01, 0.1], 'kernel': ['rbf']}

# GridSearch
grid = GridSearchCV(SVC(), param_grid, cv=5)
grid.fit(X_train, y_train)

print("Best Parameters:", grid.best_params_)
print("Best Accuracy:", grid.best_score_)


Best Parameters: {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
Best Accuracy: 0.6946666666666667


Question 9: Python Program – Naïve Bayes on Text Dataset (20 Newsgroups)

In [4]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import label_binarize

# Load dataset
data = fetch_20newsgroups(subset='train', categories=['sci.space', 'rec.autos'], remove=('headers','footers','quotes'))
X, y = data.data, data.target

# Text vectorization
vectorizer = TfidfVectorizer()
X_vec = vectorizer.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_vec, y, test_size=0.3, random_state=42)

# Train Naive Bayes
nb = MultinomialNB()
nb.fit(X_train, y_train)

# Predictions
y_pred = nb.predict_proba(X_test)

# ROC-AUC
y_test_bin = label_binarize(y_test, classes=[0,1])
roc_auc = roc_auc_score(y_test_bin, y_pred[:,1])

print("ROC-AUC Score:", roc_auc)


ROC-AUC Score: 0.9777370185314023


Question 10: Email Spam Classification (Code)

In [6]:
# Q10 - Naive Bayes Classification with safe probability handling

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score

# -----------------------------
# Sample dataset (replace with your own CSV if needed)
# -----------------------------
# For demo: binary classification dataset
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=200, n_features=5, n_classes=2, random_state=42)

# -----------------------------
# Train/Test Split
# -----------------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# -----------------------------
# Train Naive Bayes
# -----------------------------
nb = GaussianNB()
nb.fit(X_train, y_train)

# -----------------------------
# Predictions
# -----------------------------
y_pred = nb.predict(X_test)

# Safe probability handling
if len(nb.classes_) > 1 and 1 in nb.classes_:
    class_index = list(nb.classes_).index(1)  # find where class '1' is stored
    y_prob = nb.predict_proba(X_test)[:, class_index]
else:
    # If only one class was seen in training
    y_prob = np.zeros(len(X_test))

# -----------------------------
# Evaluation
# -----------------------------
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# ROC AUC only works if both classes are present in y_test
if len(np.unique(y_test)) > 1:
    print("ROC AUC:", roc_auc_score(y_test, y_prob))
else:
    print("ROC AUC: Cannot be computed (only one class in y_test)")



Accuracy: 0.85

Classification Report:
               precision    recall  f1-score   support

           0       0.86      0.83      0.85        30
           1       0.84      0.87      0.85        30

    accuracy                           0.85        60
   macro avg       0.85      0.85      0.85        60
weighted avg       0.85      0.85      0.85        60

ROC AUC: 0.9266666666666665
