In [7]:
# STEP 1: Load Dataset from Google Drive
import pandas as pd

# ✅ CHANGE THIS to your actual CSV file path
file_path = '/content/breast-cancer.csv'

df = pd.read_csv(file_path)
df.head()



Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [8]:
# STEP 2: Preprocess Data (Adjust according to your dataset)
# Example: Breast Cancer dataset

# Drop irrelevant or non-numeric columns
df = df.drop(['id', 'Unnamed: 32'], axis=1, errors='ignore')

# Convert target to binary numeric
df['diagnosis'] = df['diagnosis'].map({'M':1, 'B':0})

# Drop rows with missing values
df = df.dropna()

# Features and Target
X = df.drop('diagnosis', axis=1)
y = df['diagnosis']


In [9]:
# STEP 3: Train-Test Split and Scaling
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



In [10]:
# STEP 4: Train SVM with Linear and RBF Kernels
from sklearn.svm import SVC

# Linear SVM
svm_linear = SVC(kernel='linear', C=1.0)
svm_linear.fit(X_train_scaled, y_train)

# RBF SVM
svm_rbf = SVC(kernel='rbf', C=1.0, gamma='scale')
svm_rbf.fit(X_train_scaled, y_train)


In [11]:
# STEP 5: Evaluate Accuracy and Report
from sklearn.metrics import accuracy_score, classification_report

y_pred_linear = svm_linear.predict(X_test_scaled)
y_pred_rbf = svm_rbf.predict(X_test_scaled)

print("Linear SVM Accuracy:", accuracy_score(y_test, y_pred_linear))
print("RBF SVM Accuracy:", accuracy_score(y_test, y_pred_rbf))
print("\nRBF SVM Report:\n", classification_report(y_test, y_pred_rbf))


Linear SVM Accuracy: 0.9766081871345029
RBF SVM Accuracy: 0.9766081871345029

RBF SVM Report:
               precision    recall  f1-score   support

           0       0.98      0.98      0.98       108
           1       0.97      0.97      0.97        63

    accuracy                           0.98       171
   macro avg       0.97      0.97      0.97       171
weighted avg       0.98      0.98      0.98       171



In [12]:
# STEP 6: Hyperparameter Tuning with GridSearchCV
from sklearn.model_selection import GridSearchCV

param_grid = {
    'C': [0.1, 1, 10],
    'gamma': ['scale', 0.01, 0.001]
}

grid = GridSearchCV(SVC(kernel='rbf'), param_grid, cv=5)
grid.fit(X_train_scaled, y_train)

print("Best Parameters:", grid.best_params_)
print("Grid Search Accuracy:", grid.score(X_test_scaled, y_test))


Best Parameters: {'C': 1, 'gamma': 'scale'}
Grid Search Accuracy: 0.9766081871345029


## 💡 Interview Questions & Answers

---

**1. What is a support vector?**  
A support vector is a data point that lies closest to the decision boundary and affects the position and orientation of the hyperplane.

---

**2. What does the C parameter do?**  
It controls the trade-off between margin width and classification error.  
- **Low C**: wider margin, more misclassifications  
- **High C**: narrower margin, fewer misclassifications

---

**3. What are kernels in SVM?**  
Kernels are functions that transform the input space to higher dimensions, making the data linearly separable.  
**Examples**: Linear, Polynomial, RBF

---

**4. Difference between Linear and RBF Kernel?**

| Linear Kernel | RBF Kernel |
|---------------|------------|
| Works well when data is linearly separable | Suitable for non-linear data |
| Faster training | More flexible and powerful |
| Fewer hyperparameters | Needs tuning for C and gamma |

---

**5. What are the advantages of SVM?**  
- Works well in high-dimensional spaces  
- Effective when there is a clear margin of separation  
- Memory efficient (uses only support vectors)

---

**6. Can SVMs be used for regression?**  
Yes, using **Support Vector Regression (SVR)**, which works similarly to SVM classification but fits the best margin around a regression line.

---

**7. What happens when data is not linearly separable?**  
SVM uses **kernel tricks** (like RBF) to transform data into a higher dimension where it becomes linearly separable.

---

**8. How is overfitting handled in SVM?**  
- Through regularization via **C parameter**  
- By selecting appropriate **kernel functions**  
- Using **cross-validation** to generalize well
