# Principal Componenet Analysis

### 1. Import the requested libraries

In [1]:
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

### 2. Load the Breast Cancer Dataset

In [2]:
data = load_breast_cancer()
X = data.data
y = data.target

### 3. Standardize the Data

In [3]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

### 4. Apply PCA to Reduce Dimensions

In [4]:
pca = PCA(n_components=5)
X_pca = pca.fit_transform(X_scaled)

### 5. Create a DataFrame with the PCA Components

In [5]:
pca_df = pd.DataFrame(X_pca, columns=[f'PC{i+1}' for i in range(5)])
pca_df['target'] = y

### 6. Output the Transformed Dataset

In [6]:
print(pca_df.head())

        PC1        PC2       PC3       PC4       PC5  target
0  9.192837   1.948583 -1.123167  3.633718 -1.195118       0
1  2.387802  -3.768172 -0.529293  1.118269  0.621778       0
2  5.733896  -1.075174 -0.551747  0.912087 -0.177083       0
3  7.122953  10.275589 -3.232790  0.152547 -2.960878       0
4  3.935302  -1.948072  1.389767  2.940645  0.546752       0


### 7. Explain the cumulative variance

In [7]:
explained_variance = pca.explained_variance_ratio_
print(explained_variance.sum())

0.8473427431672579


## Recurisive Feature Elimination 

### 1. Import necessary libraries

In [8]:
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression

### 2. Initialize a Logistic Regression model

In [9]:
model = LogisticRegression(max_iter=1000)

### 3. Apply RFE for feature selection

In [10]:
rfe = RFE(model, n_features_to_select=5)
fit = rfe.fit(X, y)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

### 4. Check selected features

In [11]:
print(f"Selected Features: {fit.support_}")
print(f"Feature Ranking: {fit.ranking_}") 

Selected Features: [ True False False False False False False False False False False  True
 False False False False False False False False  True False False False
 False  True  True False False False]
Feature Ranking: [ 1 12 18 25 11  5  4  8  9 26 17  1  7 14 22 16 21 20 19 23  1 10 13 24
  6  1  1  2  3 15]


In [12]:
X

array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])