# Prediccion Extrinsica

In [53]:
import numpy as np

## Dataset

Usamos el siguiente dataset ejemplo para mostrar la viabilidad del algoritmo de filtrado colaborativo.

In [54]:
temas = 'NAFTA,Real Madrid,Olimpiadas,Comida Mexicana,Paises de europa,Intercambios,Arte Moderna,Cocina,Musica de los 70,Moda,Avalinguo'.split(',')

csv = '''1,0,0,0,1,0,0,1,0,0,0
1,1,0,1,1,0,1,0,0,1,0
0,0,1,0,1,0,0,0,1,0,0
1,0,1,0,0,1,0,0,1,0,0
0,0,0,1,0,0,1,0,0,0,1
0,1,0,0,1,0,0,1,0,1,0'''

data = np.array([list(map(float, row.split(','))) for row in csv.split('\n')], np.float64)

## Correlacion

Podemos asumir que si dos usuarios califican similarmente las mismas frases, sus calificaciones futuras igualmente seran similares.

Dado esto podemos crear una matriz de correlacion comparando sus calificaciones con la siguiente funcion:

In [55]:
def correlation(scores):
    n = len(scores)
    avg = np.sum(scores,1) / np.count_nonzero(scores, 1)
    print("SUM ", np.sum(scores, 1))
    print("NONZERO ", np.count_nonzero(scores, 1))
    print("AVG: ",avg)
    cor = np.zeros((n,n))
        
    for x in range(0,n):
        for y in range(0,n):
            cor[x,y] = np.sum((scores[x] - avg[x]) * (scores[y] - avg[y]))
            print("scores[x] ", scores[x], " avg[x] ", avg[x], " scores[y] ", scores[y], " avg[y] ", avg[y])
            print(np.sum((scores[x] - avg[x]) * (scores[y] - avg[y])))
            cor[x,y] = cor[x,y] / np.sqrt(np.sum((scores[x] - avg[x])**2) * np.sum((scores[y] - avg[y])**2))
    
    return cor

In [56]:
def predict(data):
    print(data)
    cor = correlation(data)

    n = np.zeros(np.shape(data)[1])

    p = np.zeros(np.shape(data))

    for i in range(0, len(data[0])):
        for y in range(0, len(data)):
            if data[y,i] != 0:
                for x in range(0, len(data)):
                    p[x,i] += data[y,i] * cor[x,y]
                    n[i] += 1

    return p / n

In [57]:
def reccomend(data, users, remove_scored):
    p = predict(data)
    
    r = np.zeros(np.shape(data)[1])
    
    # Remover temas ya usados
    if remove_scored:
        p -= np.abs(data)
    
    for i in users:
        r += p[i]
        print("p[i] ", p[i])
    
    print(np.argmax(r))
    return np.argmax(r)

In [58]:
print('Tema recomendado:', temas[reccomend(data, [1,2,3], True)])

[[1. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0.]
 [1. 1. 0. 1. 1. 0. 1. 0. 0. 1. 0.]
 [0. 0. 1. 0. 1. 0. 0. 0. 1. 0. 0.]
 [1. 0. 1. 0. 0. 1. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 1.]
 [0. 1. 0. 0. 1. 0. 0. 1. 0. 1. 0.]]
SUM  [3. 6. 3. 4. 3. 4.]
NONZERO  [3 6 3 4 3 4]
AVG:  [1. 1. 1. 1. 1. 1.]
scores[x]  [1. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0.]  avg[x]  1.0  scores[y]  [1. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0.]  avg[y]  1.0
8.0
scores[x]  [1. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0.]  avg[x]  1.0  scores[y]  [1. 1. 0. 1. 1. 0. 1. 0. 0. 1. 0.]  avg[y]  1.0
4.0
scores[x]  [1. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0.]  avg[x]  1.0  scores[y]  [0. 0. 1. 0. 1. 0. 0. 0. 1. 0. 0.]  avg[y]  1.0
6.0
scores[x]  [1. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0.]  avg[x]  1.0  scores[y]  [1. 0. 1. 0. 0. 1. 0. 0. 1. 0. 0.]  avg[y]  1.0
5.0
scores[x]  [1. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0.]  avg[x]  1.0  scores[y]  [0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 1.]  avg[y]  1.0
5.0
scores[x]  [1. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0.]  avg[x]  1.0  scores[y]  [0. 1. 0. 0. 1. 0. 0. 1