# TP4 Oja

In [None]:
#IMPORTS INICIALES
import pandas as pd
import numpy as np
import random
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import matplotlib.pyplot as plt
from oja import getWeights,applyOja

In [None]:
#PROPIEDADES DE CONFIGURACION
ojaProperties = {
    'seed': 10,
    'epochs':5000,
    'learningRate':0.0001
}

In [None]:
#SETEO DEL RANDOM SEED
np.random.seed(ojaProperties['seed'])
random.seed(ojaProperties['seed'])

In [None]:
#PROCESO DE DATOS DEL CSV
df=pd.read_csv('europe.csv')
dfWNames=df.copy()
df = df[df.columns[1:8]]
df.head()

In [None]:
#ESTANDARIZADO DE LOS DATOS DE LOS PAISES
standarizedCountriesData=StandardScaler().fit_transform(df)
namedBoxplot= pd.DataFrame(data=standarizedCountriesData,columns=["Area","GDP","Inflation","Life. expect","Military","Pop. growth","Unemployment"])
plt.figure(figsize = (15,9))
sns.boxplot(data=namedBoxplot)
plt.show()
# namedBoxplot.head()

In [None]:
#CALCULO DE LOS PESOS
countriesCharacteristicsCount = standarizedCountriesData.shape[1]
w = getWeights(countriesCharacteristicsCount)

initialWeightsData = {
    "Initial weights": w
}

initialWeightsDF = pd.DataFrame(initialWeightsData)

#Imprimimos los pesos iniciales
initialWeightsDF.head(countriesCharacteristicsCount)

In [None]:
#EJECUCION DE LA REGLA DE OJA
finalW = applyOja(ojaProperties['epochs'],standarizedCountriesData,w,ojaProperties['learningRate'])

In [None]:
#CALCULO DE COMPONENTES PRINCIPALES CON LIBRERIA
pca=PCA(n_components=7)
PC=pca.fit_transform(standarizedCountriesData)

In [None]:
#COMPARACION DEL VECTOR DE PESOS DEVUELTO POR OJA CON EL AUTOVECTOR ASOCIADO AL MAXIMO AUTOVALOR DE LA LIBRERIA
finalWeightsData = {
    "Final weights (Oja)":finalW,
    "PC1 eigenvector (Library)": pca.components_[0]
}

finalWeightsDF = pd.DataFrame(finalWeightsData)

#Imprimimos las componentes principales de cada pais
finalWeightsDF.head(countriesCharacteristicsCount)


In [None]:
#CALCULO DE PC1 PARA CADA PAIS (OJA Y LIBRERIA)
countries = []
pc1 = []
countriesCount = standarizedCountriesData.shape[0]
print(countriesCount)

for i in range(0,countriesCount):
    countries.append(dfWNames.iloc[:,0][i])
    countryData = np.array(standarizedCountriesData[i])
    pc1.append(np.dot(finalW,countryData))

ojaFirstPrincipalComponents = np.array(pc1)

countriesData = {
    'PC1 (Oja)': ojaFirstPrincipalComponents,
    'PC1 (Library)': np.array(PC[:,0])
}

countriesDF = pd.DataFrame(countriesData, index=countries)

#Imprimimos las componentes principales de cada pais
countriesDF.head(countriesCount)

In [None]:
#BARPLOT DE PC1 (OJA Y LIBRERIA)

plt.figure(figsize = (15,9))

#Oja
sns.set_style({'axes.grid':True})
sns.barplot(x=ojaFirstPrincipalComponents,y=countries,hue=[1 if c >= 0 else 0 for c in ojaFirstPrincipalComponents],palette=sns.color_palette(),orient="h").set(title="PC1 (Oja)")
plt.show()

plt.figure(figsize = (15,9))

#Libreria
sns.set_style({'axes.grid':True})
sns.barplot(x=PC[:,0],y=countries,hue=[1 if c >= 0 else 0 for c in PC[:,0]],palette=sns.color_palette("pastel"),orient="h").set(title="PC1 (Library)")
plt.show()

In [None]:
#BARPLOT DE COEFICIENTES DE PC1 (OJA Y LIBRERIA)

plt.figure(figsize = (15,9))

#Oja
sns.set_style({'axes.grid':True})
ojaCoefs = np.array(finalW)
sns.barplot(x=ojaCoefs,y=["Area","GDP","Inflation","Life. expect","Military","Pop. growth","Unemployment"],hue=[1 if c >= 0 else 0 for c in ojaCoefs],palette=sns.color_palette(),orient="h").set(title="PC1 Coefficients (Oja)")
plt.show()

plt.figure(figsize = (15,9))

#Library
sns.set_style({'axes.grid':True})
libraryCoefs = np.array(pca.components_[0])
sns.barplot(x=libraryCoefs,y=["Area","GDP","Inflation","Life. expect","Military","Pop. growth","Unemployment"],hue=[1 if c >= 0 else 0 for c in libraryCoefs],palette=sns.color_palette("pastel"),orient="h").set(title="PC1 Coefficients (Library)")
plt.show()

In [None]:
#BOXPLOT DE ERROR ABSOLUTO ENTRE OJA Y LIBRERIA EN FUNCION DEL LEARNING RATE

currentLearningRate = 0.1
learningRates = []
#Realizamos diversas iteraciones donde entre cada una se divide al learning rate por 10
totalIterations = 4
absErrors = np.zeros((2,totalIterations))
for i in range(0,totalIterations):
    firstPrincipalComponentOja = []
    #Aplicamos Oja
    ojaW = applyOja(ojaProperties['epochs'],standarizedCountriesData,w,currentLearningRate)
    #Calculamos la primera componente principal con el vector de pesos de Oja
    for j in range(0,countriesCount):
        countryData = np.array(standarizedCountriesData[j])
        firstPrincipalComponentOja.append(np.dot(ojaW,countryData))

    firstPrincipalComponentOja = np.array(firstPrincipalComponentOja)

    #Calculamos el error absoluto acumulado entre la primera componete de Oja y de la libreria
    absErrors[0][i]=np.sum(np.abs(np.subtract(firstPrincipalComponentOja,PC[:,0])))

    #Almacenamos el learning rate utilizado y lo modificamos para la proxima iteracion
    learningRates.append(currentLearningRate)
    currentLearningRate = currentLearningRate / 10

#Finalmente, plasmamos los resultados en un boxplot
errorsBoxplot= pd.DataFrame(data=absErrors,columns=learningRates)
plt.figure(figsize = (15,9))
sns.boxplot(data=errorsBoxplot)
plt.show()

