### **Bibliotecas**

In [1]:
from os import mkdir

import pandas as pd
import os
import numpy as np
from numpy.linalg import norm
from numpy.linalg import inv as inverse
import scipy.sparse as sparse
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.metrics import precision_recall_fscore_support as score

import warnings

warnings.filterwarnings('ignore')

In [6]:
IO_ROOT = '../data'
IO_CHECKINS = os.path.join(IO_ROOT, 'checkins')
if not os.path.exists(IO_CHECKINS):
    print('checkins folder does not exist')

OUTPUT_ROOT = os.path.join(IO_ROOT, 'output')
if not os.path.exists(OUTPUT_ROOT):
    mkdir(OUTPUT_ROOT)

### **Optimizer**

In [7]:
class Optimizer:

    def __init__(self):
        self._user_location_frequency = np.array([])
        self._user_time_frequency = np.array([])
        self._location_co_ocurrency = np.array([])
        self._location_time = np.array([])
        self._weight = 0.001
        self.activity_location = np.array([])
        self.activity_time = np.array([])
        self.user_activity = np.array([])
        self.activity_embedding = np.array([])
        self.target_location_embedding = np.array([])
        self.context_location_embedding = np.array([])
        self.time_slot_embedding = np.array([])

    def _create_user_location_frequency_matrix(self, users_checkins):
        placeids = users_checkins["placeid"].tolist()
        userids = users_checkins["userid"]
        total_users = len(users_checkins["userid"].unique())
        total_places = len(users_checkins["placeid"].unique())
        print(f'total_places: {total_places}, total_users: {total_users}\n')

        self._user_location_frequency = sparse.lil_matrix((total_users, total_places))

        for i in range(len(placeids)):
            self._user_location_frequency[userids[i], placeids[i]] += 1

    def _create_user_time_frequency_matrix(self, users_checkins: pd.DataFrame):
        users_checkins_sorted = users_checkins.sort_values(by=["datetime"])

        users_ids = users_checkins_sorted["userid"]
        datetimes = pd.to_datetime(users_checkins["datetime"])
        total_users = len(users_checkins["userid"].unique())

        self._user_time_frequency = np.zeros((total_users, 48))

        for i, j in zip(users_ids, datetimes):
            if j.weekday() >= 5:
                self._user_time_frequency[i][j.hour + 24] += 1
            else:
                self._user_time_frequency[i][j.hour] += 1

    def _create_location_coocurrency_matrix(self, users_checkins):
        try:
            users_checkins_sorted = users_checkins.sort_values(by=["datetime"])
            locations = users_checkins_sorted["placeid"].tolist()
            number_of_locations = len(users_checkins["placeid"].unique())

            self._location_co_ocurrency = sparse.lil_matrix(
                (number_of_locations, number_of_locations)
            )  ##location co occurency represents memory for save memory

            for i in range(len(locations)):
                for j in range(1, 6):
                    if (i - j) < 0:
                        break
                    self._location_co_ocurrency[locations[i], locations[i - j]] += 1
                for j in range(1, 6):
                    if (i + j) > len(locations) - 1:
                        break
                    self._location_co_ocurrency[locations[i], locations[j + i]] += 1
            sum_of_dl = np.sum(self._location_co_ocurrency)
            l_occurrency = np.sum(self._location_co_ocurrency, axis=1).reshape(-1, 1)
            c_occurrency = np.sum(self._location_co_ocurrency, axis=0).reshape(1, -1)

            for i in range(number_of_locations):
                line = self._location_co_ocurrency[i].toarray()
                ##PMI em subdivisoes da matriz esparsa
                self._location_co_ocurrency[i] = np.maximum(
                    np.log2(
                        np.maximum(line * sum_of_dl, 1)
                        / (l_occurrency[i] * c_occurrency)
                    ),
                    0,
                )

        except Exception as e:
            raise e

    def _create_location_time_matrix(self, users_checinks):
        locations = users_checinks["placeid"].tolist()
        datetimes = users_checinks["datetime"].tolist()
        total_locations = len(users_checinks["placeid"].unique())
        Dt = np.zeros((total_locations, 48))

        for i in range(len(locations)):
            if datetimes[i].weekday() >= 5:
                Dt[locations[i]][datetimes[i].hour + 24] += 1
            else:
                Dt[locations[i]][datetimes[i].hour] += 1

        sum_of_dt = np.sum(Dt)
        l_occurrency = np.sum(Dt, axis=1).reshape(-1, 1)
        c_occurrency = np.sum(Dt, axis=0).reshape(1, -1)

        mult = l_occurrency * c_occurrency
        mult[mult == 0] = -1

        tmp = np.maximum(Dt * sum_of_dt, 1) / mult
        tmp[tmp < 0] = 0
        self._location_time = np.maximum(np.log2(tmp), 0)

    def _objective_function(self, l2_weight):
        def first_component(l2_weight):
            first_equation = l2_weight * norm(
                (
                        self._user_location_frequency
                        - np.dot(self.user_activity, self.activity_location.T)
                )
            )

            second_equation = (1 - l2_weight) * norm(
                (
                        self._user_time_frequency
                        - np.dot(self.user_activity, self.activity_time.T)
                )
            )
            return first_equation + second_equation

        def second_component(l2_weight):
            first_equation = l2_weight * norm(
                (
                        self._location_co_ocurrency
                        - np.dot(
                    self.target_location_embedding,
                    self.context_location_embedding.T,
                )
                )
            )
            second_equation = (1 - l2_weight) * norm(
                (
                        self._location_time
                        - np.dot(self.target_location_embedding, self.time_slot_embedding.T)
                )
            )
            return first_equation + second_equation

        def third_component(l2_weight):
            first_equation = l2_weight * norm(
                (
                        self.activity_location
                        - np.dot(self.context_location_embedding, self.activity_embedding.T)
                )
            )
            second_equation = (1 - l2_weight) * norm(
                (
                        self.activity_time
                        - np.dot(self.time_slot_embedding, self.activity_embedding.T)
                )
            )
            return first_equation + second_equation

        activity_modeling_component = first_component(l2_weight)
        trajectory_embedding_component = second_component(l2_weight)
        collaborative_learning_component = third_component(l2_weight)

        objective_function = (
                activity_modeling_component
                + trajectory_embedding_component
                + collaborative_learning_component
        )
        objective_function += self._weight * norm(self.user_activity)
        objective_function += self._weight * norm(self.activity_time)
        objective_function += self._weight * norm(self.activity_embedding)
        objective_function += self._weight * norm(self.activity_location)
        objective_function += self._weight * norm(self.context_location_embedding)
        objective_function += self._weight * norm(self.target_location_embedding)
        objective_function += self._weight * norm(self.time_slot_embedding)

        return objective_function

    def _initialize_parameters(self, checkins, K, M):
        total_locations = len(checkins["placeid"].unique())
        total_users = len(checkins["userid"].unique())
        time_slot = 48

        # print("\nDurante a construção:")
        self.activity_location = np.random.normal(size=(total_locations, K))
        # print("activity location:", self.activity_location.shape)

        self.activity_time = np.random.normal(size=(time_slot, K))
        # print("activity time:", self.activity_time.shape)

        self.user_activity = np.random.normal(size=(total_users, K))
        # print("user activity:", self.user_activity.shape)

        self.activity_embedding = np.random.normal(size=(K, M))
        # print("activity embedding:", self.activity_embedding.shape)

        self.target_location_embedding = np.random.normal(size=(total_locations, M))
        # print("target location embedding:", self.target_location_embedding.shape)

        self.context_location_embedding = np.random.normal(size=(total_locations, M))
        # print("context location embedding:", self.context_location_embedding.shape)

        self.time_slot_embedding = np.random.normal(size=(time_slot, M))
        # print("time slot embedding:", self.time_slot_embedding.shape)

    def user_activity_embedding_function(self, K, l2_weight):
        first_equation = (
                                 l2_weight * (self._user_location_frequency * self.activity_location)
                         ) + ((1 - l2_weight) * np.dot(self._user_time_frequency, self.activity_time))
        second_equation = (
                                  l2_weight * np.dot(self.activity_location.T, self.activity_location)
                          ) + (
                                  (1 - l2_weight) * np.dot(self.activity_time.T, self.activity_time)
                                  + (l2_weight * np.identity(K))
                          )
        return np.dot(first_equation, inverse(second_equation))

    def acticity_location_embedding_function(self, K, l2_weight):
        first_equation = l2_weight * (
                (self._user_location_frequency.T * self.user_activity)
                + np.dot(self.context_location_embedding, self.activity_embedding.T)
        )
        second_equation = (
                                  l2_weight * np.dot(self.user_activity.T, self.user_activity)
                          ) + ((self._weight + l2_weight) * np.identity(K))
        return np.dot(first_equation, inverse(second_equation))

    def activity_time_embedding_function(self, K, l2_weight):
        first_equation = (1 - l2_weight) * (
                np.dot(self._user_time_frequency.T, self.user_activity)
                + np.dot(self.time_slot_embedding, self.activity_embedding.T)
        )
        second_equation = (1 - l2_weight) * (
                np.dot(self.user_activity.T, self.user_activity)
                + (1 - self._weight + l2_weight) * np.identity(K)
        )
        return np.dot(first_equation, inverse(second_equation))

    def activity_embedding_function(self, M, l2_weight):
        first_equation = (
                                 l2_weight
                                 * np.dot(self.activity_location.T, self.context_location_embedding)
                         ) + ((1 - l2_weight) * np.dot(self.activity_time.T, self.time_slot_embedding))
        second_equation = (
                (
                        l2_weight
                        * np.dot(
                    self.context_location_embedding.T, self.context_location_embedding
                )
                )
                + (
                        (1 - l2_weight)
                        * np.dot(self.time_slot_embedding.T, self.time_slot_embedding)
                )
                + (self._weight * np.identity(M))
        )
        return np.dot(first_equation, inverse(second_equation))

    def target_location_embedding_function(self, M, l2_weight):
        first_equation = (
                                 l2_weight * self._location_co_ocurrency * self.context_location_embedding
                         ) + ((1 - l2_weight) * np.dot(self._location_time, self.time_slot_embedding))

        second_equation = (
                (
                        l2_weight
                        * np.dot(
                    self.context_location_embedding.T, self.context_location_embedding
                )
                )
                + (
                        (1 - l2_weight)
                        * np.dot(self.time_slot_embedding.T, self.time_slot_embedding)
                )
                + (self._weight * np.identity(M))
        )

        return np.dot(first_equation, inverse(second_equation))

    def context_location_embedding_function(self, M, l2_weight):
        first_equation = l2_weight * (
                self._location_co_ocurrency.T * self.target_location_embedding
                + np.dot(self.activity_location, self.activity_embedding)
        )
        second_equation = (
                                  l2_weight
                                  * (
                                          np.dot(self.target_location_embedding.T, self.target_location_embedding)
                                          + np.dot(self.activity_embedding.T, self.activity_embedding)
                                  )
                          ) + (self._weight * np.identity(M))
        return np.dot(first_equation, inverse(second_equation))

    def time_slot_embedding_function(self, M, l2_weight):
        first_equation = (1 - l2_weight) * (
                np.dot(self._location_time.T, self.target_location_embedding)
                + np.dot(self.activity_time, self.activity_embedding)
        )
        second_equation = (
                                  (1 - l2_weight)
                                  * (
                                          np.dot(self.target_location_embedding.T, self.target_location_embedding)
                                          + np.dot(self.activity_embedding.T, self.activity_embedding)
                                  )
                          ) + (self._weight * np.identity(M))
        return np.dot(first_equation, inverse(second_equation))

    def _optimize_parameters(self, K, M, l2_weight):
        self.user_activity = self.user_activity_embedding_function(K, l2_weight)
        self.user_activity[self.user_activity < 0] = 0

        self.activity_location = self.acticity_location_embedding_function(K, l2_weight)
        self.activity_location[self.activity_location < 0] = 0

        self.activity_time = self.activity_time_embedding_function(K, l2_weight)
        self.activity_time[self.activity_time < 0] = 0

        self.activity_embedding = self.activity_embedding_function(M, l2_weight)
        self.target_location_embedding = self.target_location_embedding_function(
            M, l2_weight
        )
        self.context_location_embedding = self.context_location_embedding_function(
            M, l2_weight
        )
        self.time_slot_embedding = self.time_slot_embedding_function(M, l2_weight)

    def start(self, checkins, l2_weight=0.1, K=10, M=100):
        print(f'\nInicando o HMRM...')
        checkins["datetime"] = pd.to_datetime(checkins["datetime"])

        self._create_user_location_frequency_matrix(checkins)
        self._create_location_coocurrency_matrix(checkins)
        self._create_user_time_frequency_matrix(checkins)
        self._create_location_time_matrix(checkins)

        print(f'\nMatrizes criadas...')

        self._initialize_parameters(checkins, K, M)

        value = 100000

        print("\nOtimizando os parâmetros")
        for i in range(10):
            print(i)
            self._optimize_parameters(K, M, l2_weight)
            objective_func = self._objective_function(l2_weight)

            # print("user activity:", self.user_activity) # theta
            # print("activity location:", self.activity_location) # Al
            # print("activity time:", self.activity_time) # At
            # print("activity embedding:", self.activity_embedding) # Ea
            # print("target location embedding:", self.target_location_embedding) # El
            # print("context location embedding:", self.context_location_embedding) # Ec
            # print("time slot embedding:", self.time_slot_embedding) # Et

            if (value - objective_func) <= 0.1:
                break
            value = objective_func

### **HMRM Baseline**

In [8]:
class HmrmBaseline:
    def __init__(self, file=None, weight=0.5, K=7, embedding_size=50):
        self.optimizer = Optimizer()
        self.input_file = file
        self.weight = weight
        self.K = K
        self.embedding_size = embedding_size

    def start(self):
        users_checkin_filename = self.input_file
        users_checkin = pd.read_csv(users_checkin_filename, index_col=False).dropna(
            axis=1
        )

        usersid = users_checkin.userid

        placeid_mapping = dict(zip(range(users_checkin['placeid'].unique().size), users_checkin['placeid'].unique()))

        users_checkin.userid = pd.factorize(users_checkin.userid)[0].astype(int)
        users_checkin.placeid = pd.factorize(users_checkin.placeid)[0].astype(int)

        self.optimizer.start(users_checkin, self.weight, self.K, self.embedding_size)

        df = pd.DataFrame(
            data=np.concatenate(
                (
                    self.optimizer.context_location_embedding,
                    self.optimizer.target_location_embedding,
                ),
                axis=1,
            )
        )

        try:
            values = []
            for i in range(df.shape[0]):
                category = users_checkin[users_checkin["placeid"] == i][
                    "category"
                ].unique()[0]

                values.append(category)

            df["category"] = values
            df['placeid'] = list(map(lambda x: placeid_mapping[x], range(df.shape[0])))

        except Exception as e:
            print('vim pro except')
            print(f'erro: {e}')
            pass

        return df

### **Gerando os embeddings gerais com hmrm**

In [17]:
def etl_checkins(df: pd.DataFrame):
    """Clean and filter check-ins data, keeping only users with 40+ check-ins."""
    print(f'Original checkins: {df.shape}')

    # Standardize datetime column name
    if 'local_datetime' in df.columns:
        df.rename(columns={'local_datetime': 'datetime'}, inplace=True)
        print(f"Renamed 'local_datetime' to 'datetime'")

    # Filter users with at least 40 check-ins
    checkins_per_user = df['userid'].value_counts()
    selected_users = checkins_per_user[checkins_per_user >= 40]
    users_ids = selected_users.index.unique().tolist()

    print(f'Number of qualified users: {len(users_ids)}')

    filtered_checkins = df[df['userid'].isin(users_ids)]
    print(f'Filtered checkins shape: {filtered_checkins.shape}')

    return filtered_checkins


def create_embeddings(input_file, weight=0.1, K=7, embedding_size=50):
    """Generate embeddings using HMRM with specified parameters."""
    print(f'Creating embeddings with weight={weight}, K={K}, embedding_size={embedding_size}')
    hmrm = HmrmBaseline(input_file, weight, K, embedding_size)
    return hmrm.start()


def embeddings_job(state_name, path, **kwargs):
    """Process checkins for a state and generate embeddings."""
    print(f'\nProcessing {state_name.capitalize()} check-ins...')

    try:
        # Create state output directory if it doesn't exist
        state_dir = f'{OUTPUT_ROOT}/{state_name}'
        if not os.path.exists(state_dir):
            os.makedirs(state_dir)
            print(f'Created directory: {state_dir}')

        # Clean and preprocess the checkins data
        df = pd.read_csv(path, index_col=False)
        df = etl_checkins(df)

        # Save the filtered checkins
        etl_path = f'{state_dir}/{state_name}-filtrado.csv'
        df.to_csv(etl_path, index=False)
        print(f'Filtered check-ins saved to {etl_path}')

        # Generate embeddings
        embeddings = create_embeddings(etl_path,
                                       kwargs.get('weight', 0.1),
                                       kwargs.get('K', 7),
                                       kwargs.get('embedding_size', 50)
                                       )

        # Save the embeddings
        embb_path = f'{state_dir}/{state_name}-embeddings.csv'
        embeddings.to_csv(embb_path, index=False)

        print(f'Shape: {embeddings.shape}')
        print(f'Embeddings for {state_name.capitalize()} generated successfully')

        return embeddings

    except Exception as e:
        print(f'Error processing {state_name}: {str(e)}')
        raise

In [26]:
path_alabama = os.path.join(IO_CHECKINS, 'checkins_Alabama.csv')
path_arizona = os.path.join(IO_CHECKINS, 'checkins_Arizona.csv')
path_virginia = os.path.join(IO_CHECKINS, 'checkins_Virginia.csv')
path_chicago = os.path.join(IO_CHECKINS, 'checkins_Illinois.csv')
path_florida = os.path.join(IO_CHECKINS, 'checkins_florida.csv')
path_georgia = os.path.join(IO_CHECKINS, 'checkins_Georgia.csv')
path_nova_york = os.path.join(IO_CHECKINS, 'checkins_New York.csv')
path_texas = os.path.join(IO_CHECKINS, 'checkins_Texas.csv')

#### alabama

In [18]:
# alabama
_ = embeddings_job('alabama', path_alabama, weight=0.1, K=7, embedding_size=50)


Processing Alabama check-ins...
Original checkins: (93402, 8)
Renamed 'local_datetime' to 'datetime'
Number of qualified users: 418
Filtered checkins shape: (76041, 8)
Filtered check-ins saved to ../data/output/alabama/alabama-filtrado.csv
Creating embeddings with weight=0.1, K=7, embedding_size=50

Inicando o HMRM...
total_places: 9090, total_users: 418


Matrizes criadas...

Otimizando os parâmetros
0
1
2
3
4
5
6
7
8
9
Shape: (9090, 102)
Embeddings for Alabama generated successfully


#### arizona

In [19]:
# arizona
_ = embeddings_job('arizona', path_arizona, weight=0.1, K=7, embedding_size=50)


Processing Arizona check-ins...
Original checkins: (188860, 8)
Renamed 'local_datetime' to 'datetime'
Number of qualified users: 756
Filtered checkins shape: (152210, 8)
Filtered check-ins saved to ../data/output/arizona/arizona-filtrado.csv
Creating embeddings with weight=0.1, K=7, embedding_size=50

Inicando o HMRM...
total_places: 16357, total_users: 756


Matrizes criadas...

Otimizando os parâmetros
0
1
2
3
4
5
6
7
8
9
Shape: (16357, 102)
Embeddings for Arizona generated successfully


#### virginia

In [20]:
# viriginia
_ = embeddings_job('virginia', path_virginia, weight=0.1, K=7, embedding_size=50)


Processing Virginia check-ins...
Created directory: ../data/output/virginia
Original checkins: (247600, 8)
Renamed 'local_datetime' to 'datetime'
Number of qualified users: 1059
Filtered checkins shape: (195378, 8)
Filtered check-ins saved to ../data/output/virginia/virginia-filtrado.csv
Creating embeddings with weight=0.1, K=7, embedding_size=50

Inicando o HMRM...
total_places: 20947, total_users: 1059


Matrizes criadas...

Otimizando os parâmetros
0
1
2
3
4
5
6
7
8
9
Shape: (20947, 102)
Embeddings for Virginia generated successfully


#### chicago

In [23]:
# chicago
_ = embeddings_job('chicago', path_chicago, weight=0.1, K=7, embedding_size=50)


Processing Chicago check-ins...
Original checkins: (449674, 8)
Renamed 'local_datetime' to 'datetime'
Number of qualified users: 1563
Filtered checkins shape: (374135, 8)
Filtered check-ins saved to ../data/output/chicago/chicago-filtrado.csv
Creating embeddings with weight=0.1, K=7, embedding_size=50

Inicando o HMRM...
total_places: 31676, total_users: 1563


Matrizes criadas...

Otimizando os parâmetros
0
1
2
3
4
Shape: (31676, 102)
Embeddings for Chicago generated successfully


#### georgia

In [24]:
# georgia
_ = embeddings_job('georgia', path_georgia, weight=0.1, K=7, embedding_size=50)


Processing Georgia check-ins...
Created directory: ../data/output/georgia
Original checkins: (332198, 8)
Renamed 'local_datetime' to 'datetime'
Number of qualified users: 1159
Filtered checkins shape: (276308, 8)
Filtered check-ins saved to ../data/output/georgia/georgia-filtrado.csv
Creating embeddings with weight=0.1, K=7, embedding_size=50

Inicando o HMRM...
total_places: 23452, total_users: 1159


Matrizes criadas...

Otimizando os parâmetros
0
1
2
3
4
5
6
7
8
9
Shape: (23452, 102)
Embeddings for Georgia generated successfully


In [None]:
_ = embeddings_job('texas', path_texas, weight=0.1, K=7, embedding_size=50)


Processing Texas check-ins...
Created directory: ../data/output/texas
Original checkins: (3355419, 8)
Renamed 'local_datetime' to 'datetime'
Number of qualified users: 11326
Filtered checkins shape: (3092354, 8)
Filtered check-ins saved to ../data/output/texas/texas-filtrado.csv
Creating embeddings with weight=0.1, K=7, embedding_size=50

Inicando o HMRM...
total_places: 132237, total_users: 11326


Matrizes criadas...

Otimizando os parâmetros
0


### **SVM**

In [None]:
# validação cruzada k-fold no modelo
kf = KFold(n_splits=5, shuffle=True)
split = kf.split(features_alabama.iloc[:, 0:99], features_alabama.iloc[:, 100])
fscores, precisions, recalls = [], [], []

acc = []
precision = []
recall = []
w_avg_f = []
m_avg_f = []

w_avg_p = []
m_avg_p = []

w_avg_r = []
m_avg_r = []

for train_index, test_index in split:
    X_train, Y_train = features_alabama.loc[train_index].iloc[:,
                       :99], features_alabama.loc[train_index].iloc[:, 100]
    x_test, y_test = features_alabama.loc[test_index].iloc[:,
                     :99], features_alabama.loc[test_index].iloc[:, 100]

    model = svm.SVC(
        kernel="linear", decision_function_shape='ovo', class_weight="balanced")
    model.fit(X_train, Y_train)

    y_predicted = model.predict(x_test)

    precision, recall, fscore, support = score(y_test, y_predicted)
    acc.append(accuracy_score(y_test, y_predicted))

    fscores.append(fscore)
    precisions.append(precision)
    recalls.append(recall)

    w_avg_f.append(f1_score(y_test, y_predicted, average='weighted'))
    m_avg_f.append(f1_score(y_test, y_predicted, average='macro'))

    w_avg_p.append(precision_score(
        y_test, y_predicted, average='weighted'))
    m_avg_p.append(precision_score(y_test, y_predicted, average='macro'))

    w_avg_r.append(recall_score(y_test, y_predicted, average='weighted'))
    m_avg_r.append(recall_score(y_test, y_predicted, average='macro'))
    class_labels = sorted(set(y_test))

In [None]:
name_columns = [x for x in class_labels]
metrics_f = pd.DataFrame(fscores, columns=name_columns)
metrics_p = pd.DataFrame(precisions, columns=name_columns)
metrics_r = pd.DataFrame(recalls, columns=name_columns)

metrics_f["accuracy"] = acc
metrics_f["macro avg"] = m_avg_f
metrics_f["weighted avg"] = w_avg_f

metrics_p["weighted avg"] = w_avg_p
metrics_p["macro avg"] = m_avg_p

metrics_r["weighted avg"] = w_avg_r
metrics_r["macro avg"] = m_avg_r

print("\nMétricas precision:")
display(metrics_p)

print("\n\nMétricas recall:")
display(metrics_r)

print("\n\nMétricas fscore:")
display(metrics_f)

In [None]:
melted_metrics_f = metrics_f[[0, 1, 2, 3, 4, 5, 6]].melt()
palette = sns.color_palette("husl", n_colors=len(melted_metrics_f["variable"].unique()))

sns.boxplot(x="variable", y="value", hue="variable", data=melted_metrics_f, palette=palette)
plt.xlabel("Metric")
plt.ylabel("Value")
plt.title("Performance Metrics by Fold")
plt.legend(title="Fold", loc="upper right")
plt.show()

Analisando as métricas, podemos concluir que o desempenho do modelo na classificação dos POIs com base no embedding gerado pelo HMRM não é muito alto. Isso sugere que o embedding pode não capturar todas as características importantes dos dados de check-in do Alabama, levando a um desempenho relativamente baixo na classificação dos POIs.

Porém isso também pode ser por causa da definição dos parâmetros do próprio hmrm, talvez seja bom estudar mais por exemplo o número de componentes latentes (k), peso, tamanho do embedding, etc. **=>** ***se for isso, tenho algumas dúvidas:***

***1. faz sentido testar diferentes valores como no exemplo comentado na main? até achar um que dê resultados melhores?***

***2. ou esses resultados são satisfatórios já que o MTL "aprenderia e melhoraria" as informações?***