# Recommender Systemas - Morphing and Meatafeatures

## Imports

In [None]:
import pandas as pd
import warnings
import numpy as np
from recommendation import implicit_util
from scipy.sparse import csr_matrix
import implicit.evaluation
import matplotlib.pyplot as plt
import matplotlib.ticker as pltt
import seaborn as sns
from exploration import exploration_util
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

## MetaLearning

### Space D - Building Trajectories

In [None]:
def traj_1(df_source, df_target):
    return list(swap_rows(df_source, df_target, seed=1))

def traj_2(df_source, df_target):
    return list(swap_rows(df_source, df_target, seed=2))

def traj_3(df_source, df_target):
    return list(swap_rows(df_source, df_target, seed=3))

def traj_4(df_source, df_target):
    return list(swap_rows(df_source, df_target, seed=4))

def traj_5(df_source, df_target):
    return list(swap_rows(df_source, df_target, seed=5))

def traj_6(df_source, df_target):
    return list(swap_rows(df_source, df_target, seed=6))

def traj_7(df_source, df_target):
    return list(swap_rows(df_source, df_target, seed=7))

def traj_8(df_source, df_target):
    return list(swap_rows(df_source, df_target, seed=8))

def traj_9(df_source, df_target):
    return list(swap_rows(df_source, df_target, seed=9))

def traj_10(df_source, df_target):
    return list(swap_rows(df_source, df_target, seed=10))

In [None]:
def swap_rows(df_source, df_target, seed=None):
    np.random.seed(seed)
    # Obter uma permutação aleatória dos índices das linhas
    indices = np.random.permutation(len(df_source))
    df_intermediate = df_source.copy()
    for idx in indices:
        # Trocar a linha no índice atual com a linha correspondente do dataframe alvo
        df_intermediate.iloc[idx] = df_target.iloc[idx]
        # Yield retorna o dataframe intermediário após cada troca
        yield df_intermediate.copy()

### Space MF - Extracting MetaFeatures

In [None]:
from scipy.stats import entropy, kurtosis
def gini(x):
    #fonte: https://stackoverflow.com/a/39513799
    #calcula o indice de gini normalizado
    mad = np.abs(np.subtract.outer(x, x)).mean()
    rmad = mad / np.mean(x)
    g = 0.5 * rmad
    return g
    
def calculate_metafeatures(df):
    metafeatures = {}
    column_counts = df.count(axis=0).to_numpy()
    metafeatures['column.count.entropy'] = entropy(df.count(axis=0))
    metafeatures['column.count.gini'] = gini(column_counts)
    #metafeatures['column.count.kurtosis'] = kurtosis(df.count(axis=0))
    metafeatures['column.count.mean'] = np.mean(df.count(axis=0))
    metafeatures['column.mean.entropy'] = entropy(df.mean(axis=0))
    metafeatures['row.count.entropy'] = entropy(df.count(axis=1))
    #metafeatures['row.count.kurtosis'] = kurtosis(df.count(axis=1))
    metafeatures['row.count.max'] = np.max(df.count(axis=1))
    metafeatures['nrBin'] = np.sum(df.nunique())
    metafeatures['attrConc.mean'] = np.mean(df.var(axis=0))
    #metafeatures['attrEnt.mean'] = np.mean(entropy(df, axis=0))
    metafeatures['nZeros'] = np.sum(df == 0)
    metafeatures['sparsity'] = np.sum(df == 0) / df.size
    
    return pd.DataFrame(metafeatures, index=[0])

In [None]:
def metafeatures_data(dfs):
    metafeatures_list = []
    for df in dfs:
        metafeatures_df = calculate_metafeatures(df)
        metafeatures_list.append(metafeatures_df)
    combined_metafeatures_df = pd.concat(metafeatures_list, ignore_index=True)
    return combined_metafeatures_df

### Space F - Extracting MetaLabels

In [None]:
def evaluation(df):
    #Construçao da matrix (user x recipe x ratings)
    user_recipe_matrix = csr_matrix((df['rating'], (df['new_member_id'], df['new_recipe_id'])))
    # Test/train split #Alternatively use implicit.evaluation.leave_k_out_split to force each user being in both sets
    train_matrix, test_matrix = implicit.evaluation.train_test_split(user_recipe_matrix.tocsr().tocoo())
    
    # Get users/recipes in the train set (or test set respectively)
    train_user, train_recipe = implicit_util.tuple_to_unique(train_matrix.tocsr().nonzero())
    test_user, test_recipe = implicit_util.tuple_to_unique(test_matrix.tocsr().nonzero())
    
    # Executes all models, exception on Windows/Python3.10: nmslib_als, faiss_als
    evaluation, recommendations, similar_items, similar_users = implicit_util.train_and_execute_all(train_matrix, test_matrix, train_user, train_recipe, ['nmslib_als', 'faiss_als'], K=10)
    return evaluation

In [None]:
def melhor_alg(df):
    #eval_df = pd.DataFrame(df)
    df['media']=df.mean(axis=1) #faz a media dos valores das metricas para cada alg
    melhor_algoritmo = df['media'].idxmax()
    df['melhor_algoritmo']=df.index==melhor_algoritmo
    return melhor_algoritmo

In [None]:
def obter_metalabels(dfs):
    labels=[]
    for df in dfs:
        evaluations=evaluation(df)
        label=melhor_alg(evaluations)
        labels.append(label)
    return labels

In [None]:
def metados(df,labels):
    df['Algoritmo'] = labels
    return df

### MetaDataSet

In [None]:
def df_MetaFeatures(Ds, Dt):
    #Trajetorias
    intermediates_1 = traj_1(Ds, Dt)
    intermediates_2 = traj_2(Ds, Dt)
    intermediates_3 = traj_3(Ds, Dt)
    intermediates_4 = traj_4(Ds, Dt)
    intermediates_5 = traj_5(Ds, Dt)
    intermediates_6 = traj_6(Ds, Dt)
    intermediates_7 = traj_7(Ds, Dt)
    intermediates_8 = traj_8(Ds, Dt)
    intermediates_9 = traj_9(Ds, Dt)
    intermediates_10 = traj_10(Ds, Dt)
    
    #Calcular Metafeatures
    meafeatures_intermedio1=metafeatures_data(intermediates_1)
    meafeatures_intermedio2=metafeatures_data(intermediates_2)
    meafeatures_intermedio3=metafeatures_data(intermediates_3)
    meafeatures_intermedio4=metafeatures_data(intermediates_4)
    meafeatures_intermedio5=metafeatures_data(intermediates_5)
    meafeatures_intermedio6=metafeatures_data(intermediates_6)
    meafeatures_intermedio7=metafeatures_data(intermediates_7)
    meafeatures_intermedio8=metafeatures_data(intermediates_8)
    meafeatures_intermedio9=metafeatures_data(intermediates_9)
    meafeatures_intermedio10=metafeatures_data(intermediates_10)

    #Dataset MetaFeatures sem Metalabels
    df_metafeatures = pd.concat([meafeatures_intermedio1, meafeatures_intermedio2, meafeatures_intermedio3,meafeatures_intermedio4,meafeatures_intermedio5,meafeatures_intermedio6,meafeatures_intermedio7,meafeatures_intermedio8,meafeatures_intermedio9,meafeatures_intermedio10], ignore_index=True)
    
    #Metalabels
    labels_intermedio1=obter_metalabels(intermediates_1)
    labels_intermedio2=obter_metalabels(intermediates_2)
    labels_intermedio3=obter_metalabels(intermediates_3)
    labels_intermedio4=obter_metalabels(intermediates_4)
    labels_intermedio5=obter_metalabels(intermediates_5)
    labels_intermedio6=obter_metalabels(intermediates_6)
    labels_intermedio7=obter_metalabels(intermediates_7)
    labels_intermedio8=obter_metalabels(intermediates_8)
    labels_intermedio9=obter_metalabels(intermediates_9)
    labels_intermedio10=obter_metalabels(intermediates_10)
    
    #juntar labels numa so
    df1 = pd.DataFrame(labels_intermedio1, columns=['label'])
    df2 = pd.DataFrame(labels_intermedio2, columns=['label'])
    df3 = pd.DataFrame(labels_intermedio3, columns=['label'])
    df4 = pd.DataFrame(labels_intermedio4, columns=['label'])
    df5 = pd.DataFrame(labels_intermedio5, columns=['label'])
    df6 = pd.DataFrame(labels_intermedio6, columns=['label'])
    df7 = pd.DataFrame(labels_intermedio7, columns=['label'])
    df8 = pd.DataFrame(labels_intermedio8, columns=['label'])
    df9 = pd.DataFrame(labels_intermedio9, columns=['label'])
    df10 = pd.DataFrame(labels_intermedio10, columns=['label'])

    # Junta todos os dataframes em um só
    df_total = pd.concat([df1, df2, df3, df4, df5, df6, df7, df8, df9, df10], ignore_index=True
    
    #Dataset MetaFeatures com Metalabels
    if len(df_metafeatures) != len(df_total):
        raise ValueError("O número de linhas de df_metafeatures e df_total deve ser o mesmo")
    df_metafeatures['Alg'] = df_total['label'].values

    #Normalizar
    scaler = MinMaxScaler()
    numeric_cols = df_metafeatures.select_dtypes(include='number').columns
    df_metafeatures[numeric_cols] = scaler.fit_transform(df_metafeatures[numeric_cols])
    
    
    return df_metafeatures

## MetaFeatures - Internal Trajectories

In [None]:
dir='C:\\Users\\beatr\\Desktop\\Estágio\\data'

In [None]:
hummus_reviews=pd.read_csv(dir+'\\pp_reviews.csv')

In [None]:
reviews=hummus_reviews[['new_member_id','new_recipe_id','rating']]

In [None]:
Ds = reviews.sample(frac=0.5, random_state=42)  
Dt = reviews.drop(Ds.index)

In [None]:
hummus_metafeatures=df_MetaFeatures(Ds,Dt)

## Model

In [None]:
X = hummus_metafeatures.drop(columns=['Alg'])
y = hummus_metafeatures['Alg']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy do Modelo no Conjunto de Teste:", accuracy)me y_train)