In [106]:
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import numpy as np
import math
import pandas as pd
import pprint


breast = load_breast_cancer()
features = breast.feature_names
breast_data = breast.data

dataset = pd.DataFrame(breast_data)
dataset.columns = features

std_sklr = StandardScaler()
x = std_sklr.fit_transform(X = breast_data)
pca_breast = PCA(n_components=10)
principalComponents_breast = pca_breast.fit_transform(x)
eigen_values = pca_breast.explained_variance_

def get_data():
    scree_plot = {}
    eigen_total = sum(eigen_values)
    variance_percentage = []
    for i in  eigen_values:
        variance_percentage.append((i/eigen_total)*100)
    
    cumulative_variance = []
    temp = 0
    for i in variance_percentage:
        temp += i
        cumulative_variance.append(temp)
 
    for i in range(0, len(eigen_values)):
        scree_plot[i+1] = {"variance_percentage" : variance_percentage[i], "cumulative_variance": cumulative_variance[i]}
    return scree_plot

def get_top_pca():
    data_send = {}
    count = 0
    for i, j, k in zip(features, pca_breast.components_[0], pca_breast.components_[1]):
        data_send[count] = {}
        data_send[count]["name"] = i
        data_send[count]["pc1"] = j
        data_send[count]["pc2"] = k
        count += 1
    return data_send

def get_top_four_features(di = 3):
    squared_value = pca_breast.components_[:di] ** 2

    features_dict = {}
    for i in range(0, len(squared_value[0])):
        total = 0
        for j in range(0, len(squared_value)):
            total += squared_value[j][i]
        features_dict[features[i]] = math.sqrt(total)

    sorted_features_dict = [k for k, v in sorted(features_dict.items(), key=lambda item: item[1])]
    sorted_features_dict = sorted_features_dict[::-1]
    best_four_features = sorted_features_dict[:4]
    
    values_best_best_four_features = {}
    for i in best_four_features:
        values_best_best_four_features[i] = features_dict[i]

    return values_best_best_four_features

def get_top_four_matrix(di = 3):
    imp_features = get_top_four_features(di)
    imp_features_arr = [i for i in imp_features]
    np_data = dataset[imp_features_arr].to_numpy()
    send_data = {}
    for i in range(0, np_data.shape[0]):
        send_data[i] = {}
        for j in range(0, len(imp_features_arr)):
            send_data[i][imp_features_arr[j]] = np_data[i][j]
    
    return send_data

{0: {'mean fractal dimension': 0.07871,
     'smoothness error': 0.006399,
     'texture error': 0.9053,
     'worst fractal dimension': 0.1189},
 1: {'mean fractal dimension': 0.05667,
     'smoothness error': 0.005225,
     'texture error': 0.7339,
     'worst fractal dimension': 0.08902},
 2: {'mean fractal dimension': 0.05999,
     'smoothness error': 0.00615,
     'texture error': 0.7869,
     'worst fractal dimension': 0.08758},
 3: {'mean fractal dimension': 0.09744,
     'smoothness error': 0.00911,
     'texture error': 1.156,
     'worst fractal dimension': 0.173},
 4: {'mean fractal dimension': 0.05883,
     'smoothness error': 0.01149,
     'texture error': 0.7813,
     'worst fractal dimension': 0.07678},
 5: {'mean fractal dimension': 0.07613,
     'smoothness error': 0.00751,
     'texture error': 0.8902,
     'worst fractal dimension': 0.1244},
 6: {'mean fractal dimension': 0.05742,
     'smoothness error': 0.004314,
     'texture error': 0.7732,
     'worst fractal di

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [78]:
n_pcs = pca_breast.components_[:2]
n_pcs[0].shape

(30,)

In [63]:
most_important = [np.abs(pca_breast.components_[i]).argmax() for i in range(n_pcs)]

In [64]:
most_important

[7, 9, 11, 21, 4, 28, 29, 14, 16, 8]

In [75]:
features = breast.feature_names
features.shape

(569,)

In [83]:
for i in features:
    print(i)

mean radius
mean texture
mean perimeter
mean area
mean smoothness
mean compactness
mean concavity
mean concave points
mean symmetry
mean fractal dimension
radius error
texture error
perimeter error
area error
smoothness error
compactness error
concavity error
concave points error
symmetry error
fractal dimension error
worst radius
worst texture
worst perimeter
worst area
worst smoothness
worst compactness
worst concavity
worst concave points
worst symmetry
worst fractal dimension


In [96]:
data_send = {}
count = 0
for i, j, k in zip(features, pca_breast.components_[0], pca_breast.components_[1]):
    data_send[count] = {}
    data_send[count]["name"] = i
    data_send[count]["pc1"] = j
    data_send[count]["pc2"] = k
    count += 1

In [97]:
data_send

{0: {'name': 'mean radius',
  'pc1': 0.21890244370001577,
  'pc2': -0.233857131747535},
 1: {'name': 'mean texture',
  'pc1': 0.10372457821567597,
  'pc2': -0.05970608829080295},
 2: {'name': 'mean perimeter',
  'pc1': 0.2275372930056408,
  'pc2': -0.21518136139684982},
 3: {'name': 'mean area',
  'pc1': 0.22099498538594436,
  'pc2': -0.23107671128362745},
 4: {'name': 'mean smoothness',
  'pc1': 0.14258969436025495,
  'pc2': 0.18611302267032268},
 5: {'name': 'mean compactness',
  'pc1': 0.23928535395301204,
  'pc2': 0.15189161007283025},
 6: {'name': 'mean concavity',
  'pc1': 0.25840048124874004,
  'pc2': 0.0601653627989567},
 7: {'name': 'mean concave points',
  'pc1': 0.26085375838568264,
  'pc2': -0.03476750049364384},
 8: {'name': 'mean symmetry',
  'pc1': 0.13816695930365536,
  'pc2': 0.19034877037208467},
 9: {'name': 'mean fractal dimension',
  'pc1': 0.06436334637180302,
  'pc2': 0.3665754713785027},
 10: {'name': 'radius error',
  'pc1': 0.20597877585522775,
  'pc2': -0.105