In [8]:
# Import the necessary modules from scikit-learn
from sklearn.datasets import load_breast_cancer  # For loading the breast cancer dataset
from sklearn.preprocessing import StandardScaler  # For standardizing the dataset
from sklearn.decomposition import PCA  # For performing Principal Component Analysis
from sklearn.model_selection import train_test_split  # For splitting the dataset into training and testing sets
from sklearn.linear_model import LogisticRegression  # For implementing the logistic regression model
from sklearn.metrics import accuracy_score  # For calculating the accuracy of the model

# Load the breast cancer dataset from sklearn
data = load_breast_cancer()  # `data` is a Bunch object containing the dataset and related information

# Separate the features and the target variable
X = data.data  # `X` contains the features of the dataset (independent variables)
y = data.target  # `y` contains the target variable (dependent variable - malignant or benign)

# Standardize the features by removing the mean and scaling to unit variance
scaler = StandardScaler()  # Create a StandardScaler object for standardization
X_scaled = scaler.fit_transform(X)  # Fit the scaler to the data and transform it

# Perform Principal Component Analysis (PCA) to reduce the dataset to 2 principal components
pca = PCA(n_components=2)  # Create a PCA object to reduce the dataset to 2 components
X_pca = pca.fit_transform(X_scaled)  # Fit the PCA model and apply the dimensionality reduction

# Display the explained variance ratio of the two principal components
explained_variance_ratio = pca.explained_variance_ratio_  # Store the explained variance ratio of the components

# Now we'll implement logistic regression using the 2 PCA components
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.3, random_state=42)  # 70% training, 30% testing

# Train a logistic regression model on the training data
log_reg = LogisticRegression()  # Create a LogisticRegression object
log_reg.fit(X_train, y_train)  # Fit the logistic regression model to the training data

# Predict the labels for the test set
y_pred = log_reg.predict(X_test)  # Use the trained model to make predictions on the test data

# Calculate the accuracy of the logistic regression model
accuracy = accuracy_score(y_test, y_pred)  # Calculate the accuracy of the model by comparing predictions to true labels

# Convert the explained variance ratio to percentages
explained_variance_ratio_percentage = explained_variance_ratio * 100  # Multiply by 100 to convert to percentage

# Convert the accuracy to percentage
accuracy_percentage = accuracy * 100  # Multiply by 100 to convert to percentage

# Print the explained variance ratio and the accuracy of the logistic regression model in percentage format
explained_variance_ratio_percentage, accuracy_percentage 


(array([44.27202561, 18.97118204]), 97.07602339181285)