# **TABLERO INTERACTIVO DE ANÁLISIS DE COMPONENTES PRINCIPALES**


**Fuente**: Michael Pyrcz, Associate Professor, University of Texas at Austin.
https://github.com/GeostatsGuy/PythonNumericalDemos/blob/master/Interactive_PCA_Eigen.ipynb

## Instalación de librerías

In [1]:
import pandas as pd                                       # DataFrames and plotting
import numpy as np
import matplotlib.pyplot as plt                           # plotting
from matplotlib.colors import ListedColormap              # custom color maps
import matplotlib.ticker as mtick
from matplotlib.patches import Rectangle
import matplotlib as mpl
from mpl_toolkits.axes_grid1 import make_axes_locatable
from numpy.linalg import eig                              # Eigen values and Eigen vectors
from sklearn.decomposition import PCA                     # PCA program from scikit learn (package for machine learning)
from sklearn.preprocessing import StandardScaler          # normalize synthetic data
from ipywidgets import interactive                        # widgets and interactivity
from ipywidgets import widgets                            
from ipywidgets import Layout
from ipywidgets import Label
from ipywidgets import VBox, HBox
import warnings
warnings.filterwarnings('ignore')                         # ignore warnings
plt.rc('axes', axisbelow=True)                            # grids behind plot elements

## Creación del tablero

1. Calcular la matriz de covarianza de los datos.
2. Tomar una muestra de la distribución normalmultivariada a partir de la matriz de covarianza.
3. Estandarizar las muestras.
4. Calcular la matriz de covarianza real, garantizando que sea semidefinida positiva.
5. Calcular los valores y vectores propios.
6. Ordenar los pares del punto 5 en forma descendente respecto a los valores propios.
7. Graficar la varianza de los atributos, así como los valores y vectores propios.

In [2]:
l = widgets.Text(value='                                         PCA Eigen Vector / Component Loadings Demo, Prof. Michael Pyrcz, The University of Texas at Austin',
        layout=Layout(width='900px', height='30px'))
# P_happening_label = widgets.Text(value='Probability of Happening',layout=Layout(width='50px',height='30px',line-size='0 px'))
cstr = widgets.FloatSlider(min=0.0, max = 1.0, value=0.0, step = 0.1, description = r'$\rho_{strength}$',orientation='horizontal', 
        style = {'description_width':'initial','button_color':'green'},layout=Layout(width='600px',height='40px'),continuous_update=False,readout_format='.3f')

ui_summary = widgets.HBox([cstr],)
ui_summary1 = widgets.VBox([l,ui_summary],)

def run_plot_summary(cstr):
    
    m = 4;
    
    mean = np.zeros((m))                         # make inputs for multivariate dataset
    #cov = np.zeros((m,m))
    cov = np.full((m,m),0.0)
    for i in range(0,m):
        cov[i,i] = 1.0
    cov[0,1] = cov[1,0] = 0.99*cstr; cov[1,2] = cov[2,1] = -0.9*cstr; cov[0,2] = cov[2,0] = -0.7*cstr;
    
    data = np.random.multivariate_normal(mean = mean, cov = cov, size = 1000) # draw samples from MV Gaussian
    data = StandardScaler(copy=True, with_mean=True, with_std=True).fit(data).transform(data)
    
    cov_actual = np.cov(data,rowvar = False)
    
    eigen_values,eigen_vectors = eig(cov_actual) # Eigen values and vectors 
    sorted_indices = np.argsort(-eigen_values)
    sorted_eigen_vectors = eigen_vectors[:, sorted_indices]
    sorted_eigen_values = np.sort(-eigen_values)*-1
    
    fig = plt.figure(figsize=(6, 6))
    gs = fig.add_gridspec(2,2 ,width_ratios=(1.0, 1.0))
    
    plt_center = fig.add_subplot(gs[1, 1])
    plt_x = fig.add_subplot(gs[1, 0],sharey=plt_center) 
    plt_y = fig.add_subplot(gs[0, 1],sharex=plt_center) 
    plt_extra = fig.add_subplot(gs[0, 0]) 
    
    for i in range(0,m):
        for j in range(0,m):
            color = (sorted_eigen_vectors[j,i] + 1.0)/(2.0)
            plt_center.add_patch(Rectangle((i-0.5,j-0.5), 1, 1,color = plt.cm.RdBu_r(color),fill=True))
            
            if abs(sorted_eigen_vectors[j,i]) > 0.5:
                plt_center.annotate(np.round(sorted_eigen_vectors[j,i],1),(i-0.1,j-0.05),color='white')
            else:
                plt_center.annotate(np.round(sorted_eigen_vectors[j,i],1),(i-0.1,j-0.05))
    
    plt_center.set_xlim([-0.5,3.5]); plt_center.set_ylim([-0.5,3.5])
    plt_center.set_xticks([0,1, 2, 3],[1,2,3,4]); plt_center.set_yticks([0,1, 2, 3],[1,2,3,4])
    for x in np.arange(0.5,3.5,1.0):
        plt_center.plot([x,x],[-0.5,3.5],c='black',lw=3)
        plt_center.plot([-0.5,3.5],[x,x],c='black',lw=1,ls='--')
    plt_center.set_title('Eigen Vectors / Principal Component Loadings')  
    plt_center.set_xlabel('Eigen Vector'); plt_center.set_ylabel('Feature')
    
    plt_x.barh(y=np.array([0,1,2,3],dtype='float'),width=np.var(data,axis=0),color='darkorange',edgecolor='black')
    plt_x.set_xlim([3.0,0]); plt_x.set_yticks([0,1, 2, 3],[1,2,3,4])
    plt_x.plot([1,1],[-0.5,3.5],c='black',ls='--'); plt_x.annotate('Equal Variance',(1.13,2.6),rotation=90.0,size=9)
    plt_x.set_ylabel('Feature'); plt_x.set_xlabel('Variance')
    plt_x.set_title('Original Feature Variance') 
    plt_x.grid(axis='x',which='minor', color='#EEEEEE', linestyle=':', linewidth=0.5)
    plt_x.grid(axis='x',which='major', color='#DDDDDD', linewidth=0.8); plt_x.minorticks_on()
    for x in np.arange(0.5,3.5,1.0):
        plt_x.plot([-0.5,3.5],[x,x],c='black',lw=1,ls='--')
    
    plt_y.bar(x=np.array([0,1,2,3],dtype='float'),height=sorted_eigen_values,color='darkorange',edgecolor='black')
    plt_y.set_ylim([0,3.0]); plt_y.set_xticks([0,1, 2, 3],[1,2,3,4]); 
    plt_y.plot([-0.5,3.5],[1,1],c='black',ls='--'); plt_y.annotate('Equal Variance',(2.55,1.05),size=9)
    plt_y.set_xlabel('Eigen Value'); plt_y.set_ylabel('Variance')
    plt_y.set_title('Sorted, Projected Feature Variance') 
    plt_y.grid(axis='y',which='minor', color='#EEEEEE', linestyle=':', linewidth=0.5)
    plt_y.grid(axis='y',which='major', color='#DDDDDD', linewidth=0.8); plt_y.minorticks_on()    
    for x in np.arange(0.5,3.5,1.0):
        plt_y.plot([x,x],[-0.5,3.5],c='black',lw=3)

    for i in range(0,m):
        for j in range(0,m):
            color = (cov_actual[j,i] + 1.0)/(2.0)
            plt_extra.add_patch(Rectangle((i-0.5,j-0.5), 1, 1,color = plt.cm.RdBu_r(color),fill=True))
    
    plt_extra.set_xlim([-0.5,3.5]); plt_extra.set_ylim([3.5,-0.5])
    plt_extra.set_xticks([0,1, 2, 3],[1,2,3,4]); plt_extra.set_yticks([0,1, 2, 3],[1,2,3,4])
    for x in np.arange(0.5,3.5,1.0):
        plt_extra.plot([x,x],[-0.5,3.5],c='black',lw=2)
        plt_extra.plot([-0.5,3.5],[x,x],c='black',lw=2)
    plt_extra.set_title('Covariance Matrix')  
     
    cplt_extra = make_axes_locatable(plt_extra).append_axes('left', size='5%', pad=0.3)
    fig.colorbar(mpl.cm.ScalarMappable(norm=mpl.colors.Normalize(vmin=-1.0, vmax=1.0), cmap=plt.cm.RdBu_r),
                 cax=cplt_extra, orientation='vertical')
    cplt_extra.yaxis.set_ticks_position('left')
    
    plt.subplots_adjust(left=0.0, bottom=0.0, right=1.51, top=1.50, wspace=0.2, hspace=0.2); plt.show()
    
interactive_plot_summary = widgets.interactive_output(run_plot_summary, {'cstr':cstr,})
interactive_plot_summary.clear_output(wait = True)  

## Tablero interactivo

Instrucciones: Agregue correlación/redundancia de datos y observe el impacto en las cargas de los componentes (vectores propios) y la varianza explicada (valores propios).

In [3]:
display(ui_summary1, interactive_plot_summary)  

VBox(children=(Text(value='                                         PCA Eigen Vector / Component Loadings Demo…

Output()

# **VISUALIZACIÓN DE PROYECCIÓN SOBRE VECTORES ORTOGONALES**


Fuente: Michael Pyrcz, Associate Professor, University of Texas at Austin. https://github.com/GeostatsGuy/DataScienceInteractivePython/blob/main/Interactive_PCA.ipynb

# Crear una muestra de datos normal bivariados

In [4]:
x = np.random.multivariate_normal([0,0], [[1, .75], [.75, .2]], 5000)

## Implementación y visualización de los datos y sus proyecciones sobre vectores ortogonales.

In [5]:
import math
import ipywidgets as widgets
from ipywidgets import Layout
import matplotlib.transforms as transforms
import warnings
warnings.filterwarnings('ignore')

global xdata                                        #create global values to allow for orthoganal rotation and visualization
global ydata
def pc_slider(Angle):
    global xdata
    global ydata
    fig15, ((ax15,ax16)) = plt.subplots(1, 2,figsize=(15,6), constrained_layout=True)
    fig15.subplots_adjust(wspace=.5,hspace = .5)
    
    base = plt.gca().transData
    #print(base)
    rot = transforms.Affine2D().rotate_deg(int(Angle))
    #line=ax16.plot(x[:,0],x[:,1], 'o', transform= rot + base, c = 'black', alpha = 0.3)
    line=ax16.plot(x[:,0],x[:,1], 'o', c = 'black', alpha = 0.3)
    
    xdata=x[:,0]*math.cos(math.radians(int(Angle)))-x[:,1]*math.sin(math.radians(int(Angle)))
    ydata=x[:,1]*math.cos(math.radians(int(Angle)))+x[:,0]*math.sin(math.radians(int(Angle)))
    
    eigen = np.zeros([2,2])
    eigen[0,0] = math.cos(Angle*math.pi/180.0)
    eigen[1,0] = math.sin(Angle*math.pi/180.0)
    eigen[0,1] = -1*math.sin(Angle*math.pi/180.0)
    eigen[1,1] = math.cos(Angle*math.pi/180.0)
    
    df2 = pd.DataFrame({'x':xdata, 'y':ydata})
    data = df2.values
    lists=[]
    
    ydataZeroed = np.zeros(len(ydata))

    rotinv = transforms.Affine2D().rotate_deg(int(-Angle)) 
    ax16.plot(xdata, ydataZeroed,"or", c = 'red', alpha = 0.3,transform= rotinv + base)
    ax16.plot(ydataZeroed, ydata,"or", c= 'blue', alpha = 0.3,transform= rotinv + base)
    ax16.set_xlim(left=-3.5, right=3.5)
    ax16.set_ylim(bottom=-3.5, top=3.5)
    
    ax16.set_title("Data and Arbitrary Feature Projection Components");ax16.set_xlabel('Standardized Porosity'); ax16.set_ylabel('Standardized TOC')
    labels = 'Feature 2 Variance', 'Feature 1 Variance'
    sizes = []
    
#     print('Your Estimated Principal Component/Eigen Vector #1 = ' + str(eigen[:,0]))
#     print('Your Estimated Principal Component/Eigen Vector #2 = ' + str(eigen[:,1]))
    
    sumOfVariance=df2.var()['x']+df2.var()['y']
    sizes.append(df2.var()['x']/sumOfVariance)
    sizes.append(df2.var()['y']/sumOfVariance)
    n = ax15.pie(sizes, autopct='%1.2f%%',colors = ['red','blue'],shadow=True,startangle=90)
    n[0][0].set_alpha(0.5); n[0][1].set_alpha(0.5)
    ax15.axis('equal')
    ax15.legend(sizes, labels=labels,loc='upper left')
    ax15.set_title('Variance for Arbitrary Feature Projection Components')
#    plt.tight_layout()
    plt.subplots_adjust(left=0.0, bottom=0.0, right=1.0, top=1.0, wspace=0.1, hspace=0.1)
    plt.show()
    
x0 = widgets.Text(value='                                            Interactive Feature Projection - Orthogonal Rotation, Dr. Michael Pyrcz, The University of Texas at Austin',layout=Layout(width='950px', height='30px'))
x1 = widgets.IntSlider(min=0, max = 180, value = 0, description = 'Angle',orientation='horizontal',continuous_update=False)
uik2 = widgets.VBox([x0,x1],)
interactive_plot = widgets.interactive_output(pc_slider, {'Angle': x1})
interactive_plot.clear_output(wait = True)               # reduce flickering by delaying plot updating

display(uik2,interactive_plot)

VBox(children=(Text(value='                                            Interactive Feature Projection - Orthog…

Output()