In [1]:
import pandas as pd
import numpy as np
import os
os.chdir(os.path.dirname(os.getcwd()))
print(os.getcwd())

%load_ext autoreload
%autoreload 2
%reload_ext autoreload


from resources.constants import *

pictures_df = pd.read_csv(PICTURE_TRIPLETS_CSV_PATH, sep=CSV_SEPARATOR)
outfits_df = pd.read_csv(OUTFITS_CSV_PATH, sep=CSV_SEPARATOR)
user_triplets_df = pd.read_csv(USER_ACTIVITY_TRIPLETS_CSV_PATH, sep=CSV_SEPARATOR)
# Ensure tags are lists
outfits_df["tag_categories"] = outfits_df["tag_categories"].apply(eval)
outfits_df["outfit_tags"] = outfits_df["outfit_tags"].apply(eval)


c:\Users\kaborg15\Python_projects\Vibrent_Dataset_Collection


In [2]:
import src.load_baseline_resources
import pickle
from resources.constants import EMBEDDING_MODEL_DICT_PICKLE_PATH
# loaded_embeddings_dict = src.load_baseline_resources.load_embeddings_form_folder()
# pickle.dump(loaded_embeddings_dict, open(EMBEDDING_MODEL_DICT_PICKLE_PATH, "wb"))

# Loading embeddings is expensive, so we save them to a pickle file
loaded_embeddings_dict = pickle.load(open(EMBEDDING_MODEL_DICT_PICKLE_PATH, "rb"))


pictures_df["embeddings"] = pictures_df["picture.id"].map(loaded_embeddings_dict)
outfit_pictures_df = pictures_df.groupby("outfit.id").agg({"picture.id": list, "embeddings": list}).reset_index()
outfits_df["embeddings"] = outfits_df["id"].map(outfit_pictures_df.set_index("outfit.id")["embeddings"])
na_embedding_outfit_ids = outfits_df[outfits_df["embeddings"].isna()]["id"]
outfits_df = outfits_df.dropna(subset=["embeddings"])

In [3]:
# Introduce group to rental triplets
id_group_dict = outfits_df[["id", "group"]].to_dict(orient="records")
id_group_dict = {x["id"]: x["group"] for x in id_group_dict}
user_triplets_df["group"] = user_triplets_df["outfit.id"].map(id_group_dict)
# Remove triplets with no embeddings
user_triplets_df = user_triplets_df[~user_triplets_df["outfit.id"].isin(na_embedding_outfit_ids)]

In [4]:
from src.prepare_train_test_splits import convert_user_orders_to_train_test_splits
user_orders_df = user_triplets_df.groupby("customer.id").agg({"outfit.id": list, "group":list, "meta.validFrom":list, "derived.bookingTime":list}).reset_index()
user_orders_df["num_orders"] = user_orders_df["outfit.id"].apply(lambda x: len(x))
user_orders_df = user_orders_df[user_orders_df["num_orders"] > 1]

user_splits_df, user_splits_unique_df = convert_user_orders_to_train_test_splits(user_orders_df)

No unique outfit found with groups ['group.8abe6af9eccc8b578c2ef59628f8b454'
 'group.96f4cce22d4a236e0652c67fc9b18d12'
 'group.8abe6af9eccc8b578c2ef59628f8b454'
 'group.96f4cce22d4a236e0652c67fc9b18d12']


In [5]:
from tqdm.notebook import tqdm

def build_tag_dict(tags, tag_categories):
    tag_dict = {}
    for tag, tag_category in zip(tags, tag_categories):
        if tag_category not in tag_dict:
            tag_dict[tag_category] = []
        tag_dict[tag_category].append(tag)
    return tag_dict

tqdm.pandas()

outfits_df["tag_dict"] = outfits_df.progress_apply(lambda x: build_tag_dict(x["outfit_tags"], x["tag_categories"]), axis=1)

  0%|          | 0/15193 [00:00<?, ?it/s]

In [6]:
from sklearn.preprocessing import MultiLabelBinarizer

all_tags = outfits_df["outfit_tags"].values.tolist()
mlb = MultiLabelBinarizer()
one_hot_encoded = mlb.fit_transform(all_tags)
outfits_df["one_hot_encoded"] = [np.array(oh_list) for oh_list in one_hot_encoded.tolist()]

In [7]:
outfits_df

Unnamed: 0,id,name,description,group,owner,timeCreated,retailPrice,outfit_tags,tag_categories,embeddings,tag_dict,one_hot_encoded
0,outfit.fffdaa715c3646f8b1c0f04d549ff07e,Out of stock - Asymmetric Frilled Dress,"This fun, short dress features and asymmetric ...",group.50a586c78eb7626e294ba3bd07d12c79,464,2017-12-30 11:28:01.000,4000.0,"[Metallic, Synthetic, Cotton, Sandro, Dresses,...","[Details, Material, Material, Brand, Category,...","[[1.738, -0.0944, -0.0934, 0.1979, 0.2365, -0....","{'Details': ['Metallic'], 'Material': ['Synthe...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,outfit.fffa1b9a3db6415d806f3c48f8ab58d9,Yellow Shell Mellomholmene Blouse,This beautiful blouse features an adjustable n...,group.61ad2fcabb3e9197e3836376e6b67f2c,112,2021-06-07 12:07:22.921,1300.0,"[Yellow, Cotton, Blouses, Everyday, M, Summer,...","[Color, Material, Category, Occasion, Size, Se...","[[-0.0843, -0.0567, -0.05966, -0.077, 1.166, -...","{'Color': ['Yellow'], 'Material': ['Cotton'], ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,outfit.fff175b13ceb453f9928625491412ede,Kaula Dress Black,Kaula from Rodebjer is a fitted dress made in ...,group.37c2b59d63d3a9c2d58e07f532f71f7f,635,2023-06-05 09:17:59.004,3100.0,"[Synthetic, Multi Season, Rodebjer, Everyday, ...","[Material, Seasons, Brand, Occasion, Size, Cat...","[[1.27, -0.0494, -0.02313, -0.1021, 0.2625, 0....","{'Material': ['Synthetic'], 'Seasons': ['Multi...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,outfit.ffef9d7c292a48b69076d2df2e32352f,For sale - Jarvis Blouse,This wrap blouse has mid length sleeves and a ...,group.dfcaa57546b0b7a5e9eb204449b6cc1c,745,2021-05-18 14:02:28.690,1500.0,"[Cotton, Multi Season, Floral, Wrap, XS, Style...","[Material, Seasons, Details, Fit, Size, Brand,...","[[-0.04453, -0.08777, -0.0676, -0.07196, 0.086...","{'Material': ['Cotton'], 'Seasons': ['Multi Se...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,outfit.ffeef842238f4dbdabc6c730a75aa2bd,Black Amber Pants,"Feel slack and nice dressed with this pant, ma...",group.ee297c977905eb21a123a4aea5fbb6d2,504,2021-07-16 14:02:30.643,1200.0,"[Winter, Cotton, L, Knitwear, Everyday, Fall, ...","[Seasons, Material, Size, Category, Occasion, ...","[[0.02425, -0.1558, -0.1343, -0.07513, -0.0170...","{'Seasons': ['Winter', 'Fall'], 'Material': ['...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...
15824,outfit.001bf665330140cf854dcfb1cbff6b5f,Out of stock - Harley Vintage White Midi Dress,This gorgeous dress is cut in the most flatter...,group.d91a2a6728833c8082dadf27b95488a9,140,2019-06-25 10:13:55.000,3800.0,"[Viscose, L, Midi, Dresses, White, Formal, Pia...","[Material, Size, Length, Category, Color, Occa...","[[-0.10986, -0.05212, -0.04785, -0.1338, 0.035...","{'Material': ['Viscose'], 'Size': ['L'], 'Leng...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
15825,outfit.0018701ce6b049ebadc314d16623caa8,Vintage Burberry Trench Coat,You really can't go wrong with this Classic Tr...,group.6be510229d0f9faf5d19d52e7e2b2a95,58,2023-02-07 07:54:06.214,22000.0,"[Winter, Cotton, Midi, Everyday, Fall, Burberr...","[Seasons, Material, Length, Occasion, Seasons,...","[[0.9565, 0.6475, -0.0587, 0.704, 0.2399, 0.04...","{'Seasons': ['Winter', 'Fall'], 'Material': ['...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
15826,outfit.0014a5c89b244077a3d7cffd4549718e,Mira Skirt Brown,The Mira Skirt in Brown from Stine Goya is an ...,group.668be5db7976aa2cb9213dd4c7f9b7fe,4,2023-10-09 09:12:14.631,1500.0,"[Viscose, Midi, Skirts, Summer, Stine Goya, Ev...","[Material, Length, Category, Seasons, Brand, B...","[[-0.1237, -0.03632, -0.08435, -0.1036, 1.478,...","{'Material': ['Viscose'], 'Length': ['Midi'], ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
15827,outfit.0013691ff35b440e9dcfe1748ec184c7,Oldina Parka Cotta,The Oldina Parka from Kari Traa is a women's p...,group.c82046bcba672c8ec9b21be4f844b402,552,2023-02-23 12:20:27.042,3500.0,"[Winter, Synthetic, Midi, Everyday, XS, Coats,...","[Seasons, Material, Length, Occasion, Size, Ca...","[[0.4219, 0.09644, -0.0454, 1.402, -0.08295, -...","{'Seasons': ['Winter'], 'Material': ['Syntheti...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [49]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np

# Load your dataframe (example)
# outfits_df = pd.read_csv("path_to_your_dataframe.csv")

# Assuming your dataframe has the following columns:
# "one_hot_encoded" and "mean_embeddings"
# Convert them to numpy arrays
def get_mean_embedding(embeddings):
    embeddings = np.array(embeddings)
    mean_embedding = np.mean(embeddings, axis=0)
    return mean_embedding

def concatenate_embeddings(oh_embeddings, image_embeddings, oh_weighting):
    oh_embeddings = np.array(oh_embeddings) * oh_weighting
    return np.concatenate((oh_embeddings, image_embeddings))

outfits_df["mean_embeddings"] = outfits_df["embeddings"].apply(lambda x: get_mean_embedding(x))
#one_hot_encoded = np.array(outfits_df["one_hot_encoded"].tolist())
#mean_embeddings = np.array(outfits_df["mean_embeddings"].tolist())
outfits_df["concatenated_embeddings"] = outfits_df.apply(lambda x: concatenate_embeddings(x["one_hot_encoded"], x["mean_embeddings"], oh_weighting=1000), axis=1)

# Concatenate the embeddings to form the input for the autoencoder
input_embeddings = outfits_df["concatenated_embeddings"]#np.concatenate((one_hot_encoded, mean_embeddings), axis=1)


# Convert the input embeddings to a PyTorch tensor
input_embeddings = torch.tensor(input_embeddings.tolist(), dtype=torch.float32)



In [52]:
for val in outfits_df["concatenated_embeddings"].iloc[0]:
    print(val)
    print(type(val))
    print(val.shape)
    break

[ 0.          0.          0.         ...  1.21191406 -0.06329346
  0.02694702]


In [45]:
# Define the autoencoder
class Autoencoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, latent_dim):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, latent_dim),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, input_dim),
            nn.Sigmoid()  # Assuming the input is normalized between 0 and 1
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return encoded, decoded

# Define the dimensions
input_dim = input_embeddings.shape[1]
hidden_dim = 2048  # You can adjust this as needed
latent_dim = 512   # You can adjust this as needed

# Instantiate the model, define the loss function and the optimizer
model = Autoencoder(input_dim, hidden_dim, latent_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 8
batch_size = 32

for epoch in tqdm(range(num_epochs)):
    permutation = torch.randperm(input_embeddings.size()[0])
    
    for i in range(0, input_embeddings.size()[0], batch_size):
        indices = permutation[i:i+batch_size]
        batch_inputs = input_embeddings[indices]

        # Forward pass
        encoded, decoded = model(batch_inputs)

        # Compute the loss
        loss = criterion(decoded, batch_inputs)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Save the model
#torch.save(model.state_dict(), 'autoencoder_model.pth')


  0%|          | 0/8 [00:00<?, ?it/s]

Epoch [1/8], Loss: 0.1206


KeyboardInterrupt: 

In [31]:
def get_outfit_embeddings(outfits_df, model):
    one_hot_encoded = np.array(outfits_df["one_hot_encoded"].tolist())
    mean_embeddings = np.array(outfits_df["mean_embeddings"].tolist())
    input_embeddings = np.concatenate((one_hot_encoded, mean_embeddings), axis=1)
    input_embeddings = torch.tensor(input_embeddings, dtype=torch.float32)
    with torch.no_grad():
        encoded, decoded = model(input_embeddings)
    return encoded

outfit_embeddings = get_outfit_embeddings(outfits_df, model)
outfits_df["outfit_embeddings"] = [x.numpy() for x in outfit_embeddings]
print(np.stack(outfits_df["outfit_embeddings"].values).shape)

(15193, 512)


In [46]:
from sklearn.neighbors import NearestNeighbors
import numpy as np
from tqdm.notebook import tqdm

NUM_ITEMS = 100

def find_rental_history_embeddings(outfit_ids, outfit_to_embedding_dict):
    return [outfit_to_embedding_dict[outfit_id] for outfit_id in outfit_ids]

def get_mean_embedding(embeddings):
    embeddings = np.array(embeddings)
    mean_embedding = np.mean(embeddings, axis=0)
    return mean_embedding

def get_nearest_neighbors_batch(embeddings, nn, num_items, index_to_id):
    distances, indices = nn.kneighbors(embeddings, n_neighbors=num_items+1)
    ids = [[index_to_id[i] for i in idx[1:]] for idx in indices]
    distances = [dist[1:] for dist in distances]
    return ids, distances


def predict_nearest_neighbors(df, outfits_df, embeddings_column="embeddings", subset_length=-1):
    outfit_to_embedding_dict = outfits_df.set_index("id")[embeddings_column].to_dict()
    index_to_outfit_dict = {i: outfit_id for i, outfit_id in enumerate(outfits_df["id"].values)}
    group_to_embedding_dict = outfits_df.set_index("group")[embeddings_column].to_dict()
    index_to_group_dict = {i: group for i, group in enumerate(outfits_df["group"].values)}

    df["train_id_embeddings"] = df["train_outfit_ids"].apply(lambda x: find_rental_history_embeddings(x, outfit_to_embedding_dict))
    df["train_group_embeddings"] = df["train_group"].apply(lambda x: find_rental_history_embeddings(x, group_to_embedding_dict))

    df["rental_history_id_embedding"] = df["train_id_embeddings"].apply(lambda x: get_mean_embedding(x))
    df["rental_history_group_embedding"] = df["train_group_embeddings"].apply(lambda x: get_mean_embedding(x))

    nearest_neighbors = NearestNeighbors(n_neighbors=NUM_ITEMS+1, metric="cosine")
    embeddings = np.stack(outfits_df[embeddings_column].values)
    nearest_neighbors.fit(embeddings)

    id_embeddings = np.stack(df["rental_history_id_embedding"].values)
    group_embeddings = np.stack(df["rental_history_group_embedding"].values)

    id_predictions, id_distances = get_nearest_neighbors_batch(id_embeddings, nearest_neighbors, NUM_ITEMS, index_to_outfit_dict)
    group_predictions, group_distances = get_nearest_neighbors_batch(group_embeddings, nearest_neighbors, NUM_ITEMS, index_to_group_dict)

    df["id_prediction"], df["id_prediction_distances"] = id_predictions, id_distances
    df["group_prediction"], df["group_prediction_distances"] = group_predictions, group_distances
    
    return df

def predict_nearest_neighbors_images(df, outfits_df, embeddings_column="embeddings", subset_length=-1):
    outfits_df["mean_embeddings"] = outfits_df[embeddings_column].apply(lambda x: get_mean_embedding(x))

    return predict_nearest_neighbors(df, outfits_df, embeddings_column="mean_embeddings", subset_length=subset_length)

# Apply to dataframes
tqdm.pandas()

# Tag based predictions
# user_splits_df = predict_nearest_neighbors(user_splits_df, outfits_df, embeddings_column="one_hot_encoded", subset_length=-1)
# user_splits_unique_df = predict_nearest_neighbors(user_splits_unique_df, outfits_df, embeddings_column="one_hot_encoded", subset_length=-1)

# Image based predictions
# user_splits_df = predict_nearest_neighbors_images(user_splits_df, outfits_df, embeddings_column="embeddings", subset_length=-1)
# user_splits_unique_df = predict_nearest_neighbors_images(user_splits_unique_df, outfits_df, embeddings_column="embeddings", subset_length=-1)

# Combined predictions
#user_splits_df = predict_nearest_neighbors(user_splits_df, outfits_df, embeddings_column="outfit_embeddings", subset_length=-1)
#user_splits_unique_df = predict_nearest_neighbors(user_splits_unique_df, outfits_df, embeddings_column="outfit_embeddings", subset_length=-1)

# Concat predictions
user_splits_df = predict_nearest_neighbors(user_splits_df, outfits_df, embeddings_column="concatenated_embeddings", subset_length=-1)
user_splits_unique_df = predict_nearest_neighbors(user_splits_unique_df, outfits_df, embeddings_column="concatenated_embeddings", subset_length=-1)

In [48]:
from IPython.display import display

def evaluate_hit_rate_at_n(test_id, predicted_ids, n=10):
    if predicted_ids is np.nan:
        print(f"None prediction for {test_id}!")
        return 0
    predicted_ids = predicted_ids[:n]
    if test_id in predicted_ids:
        #print(f"Hit at {n} for {test_id} in {predicted_ids}")
        return 1
    return 0

HIT_RATE_COLUMNS = ["id_hit_rate_at_100", "id_hit_rate_at_10", "group_hit_rate_at_100", "group_hit_rate_at_10"]
def evaluate_df_hit_rate_at_n(df, n=10):
    df["id_hit_rate_at_100"] = df.apply(lambda x: evaluate_hit_rate_at_n(x["test_outfit_id"], x["id_prediction"], n=100), axis=1)
    df["id_hit_rate_at_10"] = df.apply(lambda x: evaluate_hit_rate_at_n(x["test_outfit_id"], x["id_prediction"], n=10), axis=1)
    df["group_hit_rate_at_100"] = df.apply(lambda x: evaluate_hit_rate_at_n(x["test_group"], x["group_prediction"], n=100), axis=1)
    df["group_hit_rate_at_10"] = df.apply(lambda x: evaluate_hit_rate_at_n(x["test_group"], x["group_prediction"], n=10), axis=1)
    display(df[HIT_RATE_COLUMNS].mean())
    return df


user_splits_df = evaluate_df_hit_rate_at_n(user_splits_df, n=10)
user_splits_unique_df = evaluate_df_hit_rate_at_n(user_splits_unique_df, n=10)

id_hit_rate_at_100       0.044224
id_hit_rate_at_10        0.010379
group_hit_rate_at_100    0.048736
group_hit_rate_at_10     0.013087
dtype: float64

id_hit_rate_at_100       0.047404
id_hit_rate_at_10        0.009932
group_hit_rate_at_100    0.049661
group_hit_rate_at_10     0.012641
dtype: float64

In [10]:
outfits_df

Unnamed: 0,id,name,description,group,owner,timeCreated,retailPrice,outfit_tags,tag_categories,embeddings,tag_dict,one_hot_encoded
0,outfit.fffdaa715c3646f8b1c0f04d549ff07e,Out of stock - Asymmetric Frilled Dress,"This fun, short dress features and asymmetric ...",group.50a586c78eb7626e294ba3bd07d12c79,464,2017-12-30 11:28:01.000,4000.0,"[Metallic, Synthetic, Cotton, Sandro, Dresses,...","[Details, Material, Material, Brand, Category,...","[[1.738, -0.0944, -0.0934, 0.1979, 0.2365, -0....","{'Details': ['Metallic'], 'Material': ['Synthe...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,outfit.fffa1b9a3db6415d806f3c48f8ab58d9,Yellow Shell Mellomholmene Blouse,This beautiful blouse features an adjustable n...,group.61ad2fcabb3e9197e3836376e6b67f2c,112,2021-06-07 12:07:22.921,1300.0,"[Yellow, Cotton, Blouses, Everyday, M, Summer,...","[Color, Material, Category, Occasion, Size, Se...","[[-0.0843, -0.0567, -0.05966, -0.077, 1.166, -...","{'Color': ['Yellow'], 'Material': ['Cotton'], ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,outfit.fff175b13ceb453f9928625491412ede,Kaula Dress Black,Kaula from Rodebjer is a fitted dress made in ...,group.37c2b59d63d3a9c2d58e07f532f71f7f,635,2023-06-05 09:17:59.004,3100.0,"[Synthetic, Multi Season, Rodebjer, Everyday, ...","[Material, Seasons, Brand, Occasion, Size, Cat...","[[1.27, -0.0494, -0.02313, -0.1021, 0.2625, 0....","{'Material': ['Synthetic'], 'Seasons': ['Multi...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,outfit.ffef9d7c292a48b69076d2df2e32352f,For sale - Jarvis Blouse,This wrap blouse has mid length sleeves and a ...,group.dfcaa57546b0b7a5e9eb204449b6cc1c,745,2021-05-18 14:02:28.690,1500.0,"[Cotton, Multi Season, Floral, Wrap, XS, Style...","[Material, Seasons, Details, Fit, Size, Brand,...","[[-0.04453, -0.08777, -0.0676, -0.07196, 0.086...","{'Material': ['Cotton'], 'Seasons': ['Multi Se...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,outfit.ffeef842238f4dbdabc6c730a75aa2bd,Black Amber Pants,"Feel slack and nice dressed with this pant, ma...",group.ee297c977905eb21a123a4aea5fbb6d2,504,2021-07-16 14:02:30.643,1200.0,"[Winter, Cotton, L, Knitwear, Everyday, Fall, ...","[Seasons, Material, Size, Category, Occasion, ...","[[0.02425, -0.1558, -0.1343, -0.07513, -0.0170...","{'Seasons': ['Winter', 'Fall'], 'Material': ['...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...
15824,outfit.001bf665330140cf854dcfb1cbff6b5f,Out of stock - Harley Vintage White Midi Dress,This gorgeous dress is cut in the most flatter...,group.d91a2a6728833c8082dadf27b95488a9,140,2019-06-25 10:13:55.000,3800.0,"[Viscose, L, Midi, Dresses, White, Formal, Pia...","[Material, Size, Length, Category, Color, Occa...","[[-0.10986, -0.05212, -0.04785, -0.1338, 0.035...","{'Material': ['Viscose'], 'Size': ['L'], 'Leng...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
15825,outfit.0018701ce6b049ebadc314d16623caa8,Vintage Burberry Trench Coat,You really can't go wrong with this Classic Tr...,group.6be510229d0f9faf5d19d52e7e2b2a95,58,2023-02-07 07:54:06.214,22000.0,"[Winter, Cotton, Midi, Everyday, Fall, Burberr...","[Seasons, Material, Length, Occasion, Seasons,...","[[0.9565, 0.6475, -0.0587, 0.704, 0.2399, 0.04...","{'Seasons': ['Winter', 'Fall'], 'Material': ['...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
15826,outfit.0014a5c89b244077a3d7cffd4549718e,Mira Skirt Brown,The Mira Skirt in Brown from Stine Goya is an ...,group.668be5db7976aa2cb9213dd4c7f9b7fe,4,2023-10-09 09:12:14.631,1500.0,"[Viscose, Midi, Skirts, Summer, Stine Goya, Ev...","[Material, Length, Category, Seasons, Brand, B...","[[-0.1237, -0.03632, -0.08435, -0.1036, 1.478,...","{'Material': ['Viscose'], 'Length': ['Midi'], ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
15827,outfit.0013691ff35b440e9dcfe1748ec184c7,Oldina Parka Cotta,The Oldina Parka from Kari Traa is a women's p...,group.c82046bcba672c8ec9b21be4f844b402,552,2023-02-23 12:20:27.042,3500.0,"[Winter, Synthetic, Midi, Everyday, XS, Coats,...","[Seasons, Material, Length, Occasion, Size, Ca...","[[0.4219, 0.09644, -0.0454, 1.402, -0.08295, -...","{'Seasons': ['Winter'], 'Material': ['Syntheti...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [42]:
user_splits_df

Unnamed: 0,train_outfit_ids,test_outfit_id,train_group,test_group,train_booking_times,test_booking_time,train_id_embeddings,train_group_embeddings,rental_history_id_embedding,rental_history_group_embedding,id_prediction,id_prediction_distances,group_prediction,group_prediction_distances,id_hit_rate_at_100,id_hit_rate_at_10,group_hit_rate_at_100,group_hit_rate_at_10
0,"[outfit.a0358734a2b24bf6b531d86101378b7d, outf...",outfit.e2ac14f5ba454e0eaf3e7c0f926c65ee,"[group.e7926cffbe8336c0f24ed413753ed407, group...",group.7888add59a20f7a82527345c46a93da6,"[2020-11-22 21:27:04.340, 2020-11-22 21:27:04....",2020-12-03 16:33:14.331,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[outfit.677624ad320f4f6996aca0def1d0aadd, outf...","[0.22405971020101478, 0.2240597102010149, 0.22...","[group.ffd964ee97b5df79aa712b4421c4dcce, group...","[0.22405971020101478, 0.2240597102010149, 0.22...",0,0,0,0
1,"[outfit.cc2a4ea6b82044d2804ee26e593fbc00, outf...",outfit.cc59868d04754b33a44f9f04e0df6b9a,"[group.89c749588d0ce926c1a32fdabf649397, group...",group.6e34f9306a4a8a3d637c2eef83a0a894,"[2024-01-15 10:38:41.449, 2024-01-15 10:38:41....",2024-01-29 20:18:51.948,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[outfit.ffb003f0cb704409b5db080bdd2482d4, outf...","[0.2215931913431679, 0.2337557977284308, 0.233...","[group.5da155d97fa0eb985818b4554ba03cbf, group...","[0.21858193519119296, 0.2307915924538304, 0.24...",0,0,0,0
2,"[outfit.0c22c8c8db0e4b469cb5433572cc6487, outf...",outfit.491310236c1447b58e87a3631f969b02,"[group.3ab741dfab8f4ec1c2385b1df3638792, group...",group.6c1d583e10ad920080b5605353209796,"[2022-06-01 23:16:59.078, 2022-06-01 23:16:59....",2022-06-01 23:16:59.078,"[[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,...","[[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, ...","[outfit.ce8e8e7c943b44809161514e1007b9d0, outf...","[0.17595807580063239, 0.18111874947398243, 0.1...","[group.329b3257b997acea076e028366dd1e03, group...","[0.17595807580063239, 0.18111874947398243, 0.1...",0,0,0,0
3,[outfit.4eb647c9c034498da674fca7196fbccb],outfit.2392f0645935482bb171c7fd35384587,[group.60de1b488ecc65924d6714636e6f8ed5],group.95b244c3abe5d6b5552ef50448356d1a,[2022-07-31 20:31:01.840],2022-07-31 20:31:01.840,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[outfit.60c153e88cd5406eb40af52a1948312c, outf...","[0.0, 0.0, 0.09090909090909094, 0.090909090909...","[group.60de1b488ecc65924d6714636e6f8ed5, group...","[0.0, 0.09090909090909094, 0.09090909090909094...",0,0,0,0
4,"[outfit.e377aedd50ec4073af1f18cb9eab432b, outf...",outfit.1f534d95ccf6415a8af3b5799dd959e7,"[group.cbd88cf0356ef18209b64fb155b1879d, group...",group.e40cb44f292333d973f538d13f21cde6,"[2022-07-31 20:04:08.529, 2022-07-31 20:04:08....",2022-10-18 19:48:36.704,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[outfit.155567585a3c417fa6043924c2867d0e, outf...","[0.29949214661509493, 0.3039897341541765, 0.30...","[group.209da05a58272eae07318be4e316d3bb, group...","[0.28320126630381803, 0.2851786458880732, 0.28...",0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2211,"[outfit.a8e66ebfb9004fa6ac6d7b8c46e381de, outf...",outfit.6215b11566104bd09188bb1b69db5665,"[group.3de77ccdf58eb30b09258b457729e0f2, group...",group.4363d0a2b93e2a065e19ff499015fd32,"[2024-01-12 17:44:38.463, 2024-01-12 17:44:38....",2024-01-15 13:09:21.753,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[outfit.13db1499701e46f5b6e98d5e6d34267a, outf...","[0.23453445538025708, 0.27831216351296795, 0.2...","[group.bbb51bacdb27b2d2546f8a31be06169c, group...","[0.2141747220142588, 0.23019964108049895, 0.26...",0,0,0,0
2212,"[outfit.da552355bfd04649a987a3881b99d09c, outf...",outfit.b7ebd99d748b28d0,"[group.8068ba000114d2e1f247e469bab9e242, group...",group.fbb0f68dd4dd0de51d59284ea53c3425,"[2022-08-21 07:58:42.868, 2022-08-21 07:58:42....",2022-08-22 15:08:14.158,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[outfit.133b1fb9d7434277aaf0f788c5c9d606, outf...","[0.22150105583847712, 0.23683275592813702, 0.2...","[group.2c180ce17fa60d5264a271245c5b6e0d, group...","[0.2499999999999999, 0.26213521262737816, 0.26...",0,0,0,0
2213,"[outfit.bb709dadbe604c4bb947559236ff8e07, outf...",outfit.944a547c771246cd992088566b4f641d,"[group.8041d98fc65a186f3c94701cc1d365de, group...",group.d727b17229241e5524e82a56a9d1c08e,"[2023-08-31 19:16:11.997, 2023-08-31 19:16:11....",2024-01-08 22:51:31.777,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[outfit.1096c0a926c244b39b44ff523a6ad9e0, outf...","[0.24284823167830805, 0.24688784758813676, 0.2...","[group.424391a1c514a2be56a00dd30007f1ec, group...","[0.26616141246832337, 0.26616141246832337, 0.2...",1,0,1,0
2214,"[outfit.e80978de4a2a410c9ab14d3050fd9cf4, outf...",outfit.6b8aaed196c044848f417bc2da4314d0,"[group.51c6e2c0cae271a723afdfbeab59d2f1, group...",group.10e2052f38b5e8ee18aa6306bb7a6a9b,"[2021-12-06 18:32:21.698, 2021-12-06 18:51:54....",2022-02-08 21:05:20.159,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[outfit.50093dbaaf2841e3871dff882345d2c9, outf...","[0.2222222222222221, 0.22604725660874547, 0.24...","[group.47f26fbcc31dfd4f9b98703ea6fe384c, group...","[0.2222222222222221, 0.22604725660874547, 0.24...",0,0,0,0


In [41]:
import pyperclip

def format_dicts_into_latex(all_dict, ind_dict, precision=4, run_name="Random"):
    first_row = f"{run_name} Ind: & {all_dict['id_hit_rate_at_10']:.{precision}f} & {all_dict['id_hit_rate_at_100']:.{precision}f} & {ind_dict['id_hit_rate_at_10']:.{precision}f} & {ind_dict['id_hit_rate_at_100']:.{precision}f} \\\\"
    second_row = f"{run_name} Groups: & {all_dict['group_hit_rate_at_10']:.{precision}f} & {all_dict['group_hit_rate_at_100']:.{precision}f} & {ind_dict['group_hit_rate_at_10']:.{precision}f} & {ind_dict['group_hit_rate_at_100']:.{precision}f} \\\\\\hline"
    full_string = first_row + "\n" + second_row + "\n"
    print(full_string)
    pyperclip.copy(full_string)

all_dict = {column: user_splits_df[column].mean() for column in HIT_RATE_COLUMNS}
ind_dict = {column: user_splits_unique_df[column].mean() for column in HIT_RATE_COLUMNS}

format_dicts_into_latex(all_dict, ind_dict, precision=4, run_name="Tag Embed")

Tag Embed Ind: & 0.0194 & 0.0848 & 0.0163 & 0.0844 \\
Tag Embed Groups: & 0.0244 & 0.0921 & 0.0190 & 0.0889 \\\hline



In [None]:
import numpy as np

def get_outfit_category(tag_categories, tags, category):
    tag_categories, tags = np.array(tag_categories), np.array(tags)
    category_indexes = np.where(tag_categories == category)[0]
    if len(category_indexes) == 0:
        return ""
    cat_tags = tags[category_indexes]
    output = str(cat_tags[0])
    return output

outfits_df["size"] = outfits_df.apply(lambda x: get_outfit_category(x["tag_categories"], x["outfit_tags"], "Size"), axis=1)


In [None]:
outfits_df