In [1]:
import pandas as pd
import numpy as np

import src.latent_factor_xai as lfxai
import concepts

import warnings
from pandas.core.common import SettingWithCopyWarning

warnings.simplefilter(action="ignore", category=SettingWithCopyWarning) 

In [2]:
def getMoviesExplanation(recommendation, lattice):
    """
        Devuelve la lista de peliculas que comparten el nodo padre con la pelicula recomendada
    """
    
    # el nodo padre es aquel (o aquellos nodos) en el que encontramos el mayor numero de atributos para la pelicula recomendada
    # al ser el nodo mas especifico, es el nodo en el que la recomendacion se encuentra que mas atributos tiene 
    # por tanto tenemos que encontrar ese intent (el que tenga mayor numero de atributos donde se encuentra la pelicula 
    # recomendada en el extent, pero cuando esa peli recomendada no este sola) y coger todos los extent de ese nodo
    
    my_movies = list()
    
    # guardamos todos las movies con sus atributos compartidos segun el reticulo
    for extent, intent in lattice:
        my_movies.append((extent,intent)) # movies, attributes
    
    # de la lista total, nos quedamos con aquellos conjuntos de peliculas donde este la recomendacion, y donde 
    # la recomendacion no este sola, porque no tiene sentido
    my_movies_2 = [(x[0],x[1],len(x[1])) for x in my_movies if recommendation in x[0] and len(x[0]) > 1]
    
    # guardamos las listas de peliculas donde haya mas atributos compartidos sin que sea el nodo root
    max_len = 0
    max_list = list()
    for elem in my_movies_2:
        if elem[2] >= max_len:
            for elem_tuple in elem[0]:
                max_list.append(elem_tuple) 
            max_len = elem[2]
    
    # de la lista, eliminamos la recomendacion 
    final_list = [x for x in max_list if x != recommendation]
    
    return final_list

In [3]:
def get_dummie(df, column, sep):
    new_df = df[column].str.get_dummies(sep=sep)
    result = pd.concat([df, new_df], axis=1)        
    #result.drop(columns=[column])
    return result

In [4]:
def dataframe_to_context_matrix(lattice_movies):
    # Generamos la matriz necesaria para concepts
    lattice_movies['title_year'] = lattice_movies['title_year'].apply(lambda val: str(val)) # pasamos años a str
    lista_columns = ['director_name', 'genres', 'stars', 'language', 'country', 'title_year']

    for c in range(len(lista_columns)):
        lattice_movies = get_dummie(lattice_movies, lista_columns[c], sep='|')

    lista_columns_to_drop = ['director_name', 'genres', 'stars', 'language', 'country', 'title_year', 'movie_title', 'duration']
    lattice_movies.drop(columns=lista_columns_to_drop, axis=1, inplace=True)

    result = lattice_movies.replace([0, 1], ['', 'X'])
    return result.set_index(['id'])

In [5]:
def get_lattice(movie_recommended, examples):
    movies_ids = np.append(examples, movie_recommended)
    
    # Obtengo las descripciones de las películas
    lattice_val = movies_attr_df[movies_attr_df['id'].isin(lattice_ids)]
    
    # Lo convierto a una matriz válida para concepts
    lattice_val = dataframe_to_context_matrix(lattice_val)
    
    objects = lattice_movies.index.tolist()
    properties = list(lattice_movies)
    bools = list(lattice_movies.fillna(False).astype(bool).itertuples(index=False, name=None))

    return concepts.Context(objects, properties, bools)

In [6]:
def get_movies_common_attribute(context, movie_recommended):
    movie_rec_attr = list(context.intension([str(movie_recommended)]))
    
    common = []
    for attr in movie_rec_attr:
        common.extend(list(context.extension([attr])))
        
    result = list(set(common))
    result.remove(str(movie_recommended))
    return result

In [7]:
def train_test_split(dataDF, training_percentage):
    '''
    Función que divide el dataset en un conjunto de entrenamiento y
    otro conjunto de evaluación.
    '''
    msk = np.random.rand(len(dataDF)) < float(training_percentage / 100)
    train = dataDF[msk]
    test = dataDF[~msk]
    
    return train, test

## Evaluación

In [8]:
## Preparamos los datos
# Cargamos todos los ratings del experimento
movies_DF = pd.read_csv('data/experiment_data/movies.csv')
ratings_DF = pd.read_csv('data/experiment_data/ratings.csv', usecols=[0,1,2])

# Preparamos el dataset de entrenamiento y evaluación
trainset, testset = train_test_split(ratings_DF, 90)

# Guardamos ambos dataframes en CSV
trainset.to_csv('data/experiment_data/trainset.csv', index=False)
testset.to_csv('data/experiment_data/testset.csv', index=False)

In [9]:
model = lfxai.NMF_XAI()

# Entrenamos el model
model.fit(trainset, movies_DF)

In [10]:
# k, #examples, #common attr, #explanation
values = []

for k in range(1,20):
    num_exceptions = 0
    #precision = []
    #recall = []
    test_id = 0

    for tst in testset.values:
        try:
            pred = model.predict(int(tst[0]), int(tst[1]))
            examples = model.get_examples(user_id=int(tst[0]), movie_id=int(tst[1]), n=k)
            context = model.get_lattice(movie_recommended=int(tst[1]), examples=examples)

            movies_explanation = getMoviesExplanation(str(int(tst[1])), context.lattice)
            movies_common = get_movies_common_attribute(context, movie_recommended=int(tst[1]))

            values.append([k, len(list(examples)), len(movies_common), len(movies_explanation)])
            #print("Examples={} - Common={} - Explanation={}".format(len(list(examples)), len(movies_common), len(movies_explanation)))
            
            #prec = len(movies_explanation)/len(list(examples))
            #rec = len(movies_explanation)/len(movies_common)
            #print("Precision={} - Recall={}".format(prec, rec))
            
            #precision.append(prec)
            #recall.append(rec)            
        except Exception as e:
            num_exceptions += 1
            pass

    print("k={} - Exceptions: {}".format(k, num_exceptions))

    #total_precision.append(sum(precision) / len(precision))
    #total_recall.append(sum(recall) / len(recall))

k=1 - Exceptions: 856
k=2 - Exceptions: 856
k=3 - Exceptions: 859
k=4 - Exceptions: 863
k=5 - Exceptions: 863
k=6 - Exceptions: 866
k=7 - Exceptions: 870
k=8 - Exceptions: 871
k=9 - Exceptions: 873
k=10 - Exceptions: 874
k=11 - Exceptions: 874
k=12 - Exceptions: 876
k=13 - Exceptions: 877
k=14 - Exceptions: 878
k=15 - Exceptions: 881
k=16 - Exceptions: 884
k=17 - Exceptions: 885
k=18 - Exceptions: 888
k=19 - Exceptions: 891


In [15]:
result_df = pd.DataFrame(values, columns=['K', 'Examples', 'Common Attr', 'Final Explanation'])
result_df.head()

Unnamed: 0,K,Examples,Common Attr,Final Explanation
0,1,1,1,1
1,1,1,1,1
2,1,1,1,1
3,1,1,1,1
4,1,1,1,1


In [20]:
result_df['Precision'] = result_df.apply(lambda x: x['Final Explanation']/x['Examples'], axis=1)
result_df['Recall'] = result_df.apply(lambda x: x['Final Explanation']/x['Common Attr'], axis=1)

  result_df['Recall'] = result_df.apply(lambda x: x['Final Explanation']/x['Common Attr'], axis=1)


In [26]:
result_df.groupby(by='K').count()

Unnamed: 0_level_0,Examples,Common Attr,Final Explanation,Precision,Recall
K,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,322,322,322,322,322
2,322,322,322,322,322
3,319,319,319,319,319
4,315,315,315,315,315
5,315,315,315,315,315
6,312,312,312,312,312
7,308,308,308,308,308
8,307,307,307,307,307
9,305,305,305,305,305
10,304,304,304,304,304


In [11]:
pd.DataFrame(list(zip(range(1,20), total_precision, total_recall)), columns =['K', 'Precision', 'Recall'])

NameError: name 'total_precision' is not defined

In [None]:
import matplotlib.pyplot as plt
import numpy as np

In [None]:
fig = plt.figure()
fig.set_figheight(10)
fig.set_figwidth(10)

ax = fig.add_subplot(111)    # The big subplot
ax1 = fig.add_subplot(211)
ax2 = fig.add_subplot(212)

ax.spines['top'].set_color('none')
ax.spines['bottom'].set_color('none')
ax.spines['left'].set_color('none')
ax.spines['right'].set_color('none')
ax.tick_params(labelcolor='w', top=False, bottom=False, left=False, right=False)

precision_points = np.array(total_precision)
recall_points = np.array(total_recall)

ax1.plot(precision_points, c = '#4CAF50')
ax2.plot(recall_points, c = '#D17170')
ax1.set_ylabel('Precision', fontsize=20, fontweight='bold')
ax2.set_ylabel('Recall', fontsize=20, fontweight='bold')

ax.set_xlabel('K retrieved examples', fontsize=20, fontweight='bold')

plt.show()

## Prueba Concepts

Para cargar un retículo, es necesario primero crear un DF donde el índice sea el id de la película, las columnas sean los atributos y los valores sean vacíos o X.

In [None]:
# Cargamos los datos
trainset = pd.read_csv('data/experiment_data/trainset.csv')
testset = pd.read_csv('data/experiment_data/testset.csv')
movies_attr_df = pd.read_csv('data/experiment_data/movies.csv')

In [None]:
# Creamos y entrenamos el modelo
model = lfxai.NMF_XAI()
model.fit(trainset, movies_attr_df)

In [None]:
# Ejemplo de ejecución
user = 8
movie = 25

In [None]:
# Obtenemos los ejemplos de explicación
pred = model.predict(user, movie)
examples = model.get_examples(user, movie, n=10)

In [None]:
examples

In [None]:
# Obtenemos las descripciones de todas las películas
lattice_ids = np.append(examples, movie)
lattice_movies = movies_attr_df[movies_attr_df['id'].isin(lattice_ids)]

In [None]:
# Generamos la matriz necesaria para concepts
lattice_movies['title_year'] = lattice_movies['title_year'].apply(lambda val: str(val)) # pasamos años a str
lista_columns = ['director_name', 'genres', 'stars', 'language', 'country', 'title_year']

for c in range(len(lista_columns)):
    lattice_movies = get_dummie(lattice_movies, lista_columns[c], sep='|')
    
lista_columns_to_drop = ['director_name', 'genres', 'stars', 'language', 'country', 'title_year', 'movie_title', 'duration']
lattice_movies.drop(columns=lista_columns_to_drop, axis=1, inplace=True)

lattice_movies = lattice_movies.replace([0, 1], ['', 'X'])

In [None]:
# Generamos el retículo
lattice_movies.set_index(['id'], inplace=True)
objects = [str(x) for x in lattice_movies.index.tolist()]
properties = list(lattice_movies)
bools = list(lattice_movies.fillna(False).astype(bool).itertuples(index=False, name=None))

context_movies = concepts.Context(objects, properties, bools)

In [None]:
for extent, intent in context_movies.lattice:
    print("{} - {}".format(extent, intent))

In [None]:
getMoviesExplanation(str(movie), context_movies.lattice)

In [None]:
context_movies.lattice.graphviz()