In [29]:
# Import necessary libraries
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()  # Load the breast cancer dataset from sklearn

In [30]:
# We will split the data into training and testing sets
from sklearn.model_selection import train_test_split

# train_test_split is used to split the dataset into training and testing sets. 
# We use random_state=0 to make sure the splits are the same each time the code is run (for reproducibility).
x_train, x_test, y_train, y_test = train_test_split(cancer.data, cancer.target, random_state=0)

In [31]:
# Importing the Support Vector Classifier (SVC) algorithm
from sklearn.svm import SVC

# Create a Support Vector Classifier model with 'gamma' set to 'auto'. 
# 'gamma' is a parameter that controls the complexity of the decision boundary. 
# By setting 'gamma' to 'auto', we're allowing sklearn to choose a reasonable default value.
svc = SVC(gamma='auto').fit(x_train, y_train)

In [32]:
# Print the accuracy score of the model on the training and test data
print(svc.score(x_train, y_train))  # Accuracy on training data
print(svc.score(x_test, y_test))    # Accuracy on test data

1.0
0.6293706293706294


In [33]:
# Find the minimum value for each feature in the training set
min_on_training = x_train.min(axis=0)

In [34]:
# Find the range (maximum value - minimum value) of each feature in the training set
range_on_training = (x_train - min_on_training).max(axis=0)

In [35]:
# Normalize the training data by subtracting the min value and dividing by the range
x_train_scaled = (x_train - min_on_training) / range_on_training

# Apply the same scaling to the test data using the minimum and range calculated from the training data
x_test_scaled = (x_test - min_on_training) / range_on_training

In [36]:
# Now we create a new SVC model with the normalized data and fit it to the training data
svc = SVC(gamma='auto').fit(x_train_scaled, y_train)

# Print the accuracy scores of the model on the scaled training and test data
print(svc.score(x_train_scaled, y_train))  # Accuracy on training data (scaled)
print(svc.score(x_test_scaled, y_test))    # Accuracy on test data (scaled)

0.9483568075117371
0.951048951048951


In [37]:
# Let's now experiment with a different value for 'C' (a regularization parameter).
# 'C' determines how much we penalize the model for misclassifications. A high C value results in a model that fits the training data more tightly (lower bias, higher variance).
svc = SVC(C=1000, gamma='auto').fit(x_train_scaled, y_train)

# Print the accuracy scores of the model on the training and test data with the new 'C' value
print(svc.score(x_train_scaled, y_train))  # Accuracy on training data (with C=1000)
print(svc.score(x_test_scaled, y_test))    # Accuracy on test data (with C=1000)

0.9882629107981221
0.972027972027972
