In [None]:
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()

In [None]:
print(cancer.DESCR)

In [None]:
print(len(cancer.data[cancer.target==0]))

In [None]:
import pandas as pd

cancer_df = pd.DataFrame(cancer.data,columns=cancer.feature_names)

In [None]:
cancer_df.describe()

In [None]:
import numpy as np

In [None]:
import matplotlib.pyplot as plt

fig,axes=plt.subplots(10,3, figsize=(12,9))
maligant=cancer.data[cancer.target==0]
benign=cancer.data[cancer.target==1]
ax=axes.ravel()


for i in range(30):
  _,bins=np.histogram(cancer.data[:,i],bins=40)
  ax[i].hist(maligant[:,i],bins=bins,color='r',alpha=.5)
  ax[i].hist(benign[:,i],bins=bins,color='g',alpha=.3)
  ax[i].set_title(cancer.feature_names[i],fontsize=9)
  ax[i].axes.get_xaxis().set_visible(False)
  ax[i].set_yticks(())

ax[0].legend(['malignant','benign'],loc='best',fontsize=8)

plt.tight_layout()

plt.show()

In [None]:
plt.subplot(1,2,1)
plt.scatter(cancer_df['worst symmetry'],cancer_df['worst texture'],s=cancer_df['worst area']*0.05,color='magenta',label='check',alpha=0.3)
plt.xlabel('Worst Symmetry', fontsize=12)
plt.ylabel('Worst Texture',fontsize=12)


plt.subplot(1,2,2)
plt.scatter(cancer_df['mean radius'],cancer_df['mean concave points'],s=cancer_df['mean area']*0.05,color='purple',label='check',alpha=0.3)
plt.xlabel('mean radius', fontsize=12)
plt.ylabel('mean concave points',fontsize=12)

plt.tight_layout()
plt.show()

In [None]:
from sklearn.preprocessing import StandardScaler

scalar=StandardScaler()
X_scaled=scalar.fit_transform(cancer.data)

In [None]:
print(np.mean(X_scaled),np.std(X_scaled))

In [None]:
from sklearn.decomposition import PCA

pca=PCA(n_components=3)
X_pca=pca.fit_transform(X_scaled)

In [None]:
print('shape ', X_pca.shape)

In [None]:
X_pca[:10]

In [None]:
print('Explained variation per principal component {}'.format(pca.explained_variance_ratio_))

In [None]:
Xax=X_pca[:,0]
Yax=X_pca[:,1]
labels=cancer.target
cdict={0:'red',1:'green'}
labl={0:'Malignant',1:'Benign'}
marker={0:'*',1:'o'}
alpha={0:.3,1:0.5}

fig,ax=plt.subplots(figsize=(7,5))
fig.patch.set_facecolor('white')
for l in np.unique(labels):
  ix=np.where(labels==l)
  ax.scatter(Xax[ix],Yax[ix],c=cdict[l],s=40, label=labl[l],marker=marker[l],alpha=alpha[l])

plt.xlabel("PC 1", fontsize=14)
plt.ylabel("PC 2", fontsize=14)

plt.legend()
plt.show()

In [None]:
plt.matshow(pca.components_,cmap='viridis')
plt.yticks([0,1,2],['1 PC','2 PC','3 PC'], fontsize=10)
plt.colorbar()
plt.xticks(range(len(cancer.feature_names)),cancer.feature_names,rotation=65, ha='left')
plt.tight_layout()
plt.show()