In [37]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
import pandas as pd


In [38]:
# Load the dataset
file_path = 'wisc_bc_ContinuousVar.csv'
data = pd.read_csv(file_path)


In [39]:
# Display basic information and the first few rows of the dataset
data_info = data.info()
first_few_rows = data.head()
print(data_info)
print(first_few_rows)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568
Data columns (total 32 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   id                       569 non-null    int64  
 1   diagnosis                569 non-null    object 
 2   radius_mean              569 non-null    float64
 3   texture_mean             569 non-null    float64
 4   perimeter_mean           569 non-null    float64
 5   area_mean                569 non-null    float64
 6   smoothness_mean          569 non-null    float64
 7   compactness_mean         569 non-null    float64
 8   concavity_mean           569 non-null    float64
 9   concave points_mean      569 non-null    float64
 10  symmetry_mean            569 non-null    float64
 11  fractal_dimension_mean   569 non-null    float64
 12  radius_se                569 non-null    float64
 13  texture_se               569 non-null    float64
 14  perimeter_se             5

In [40]:
# Checking for null values in the dataset
null_values = data.isnull().sum()
null_values_summary = null_values[null_values > 0]

null_values_summary


Series([], dtype: int64)

In [41]:
# Data Preprocessing
# Removing the 'id' column because it does not affect output
data = data.drop(['id'], axis=1)

# Converting 'diagnosis' column to binary format
data['diagnosis'] = data['diagnosis'].map({'M': 1, 'B': 0})


In [42]:
# Splitting the dataset into features and target variable
X = data.drop('diagnosis', axis=1)
y = data['diagnosis']


In [43]:

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [44]:
# Standardizing the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [45]:
# Model Training
svm_model = SVC()
svm_model.fit(X_train, y_train)

In [46]:
# Model Evaluation
y_pred = svm_model.predict(X_test)
report = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

#report, conf_matrix
print(conf_matrix)
print(report)


[[106   2]
 [  2  61]]
              precision    recall  f1-score   support

           0       0.98      0.98      0.98       108
           1       0.97      0.97      0.97        63

    accuracy                           0.98       171
   macro avg       0.97      0.97      0.97       171
weighted avg       0.98      0.98      0.98       171

