In [None]:
import logging
from logging import getLogger
from recbole.config import Config
from recbole.data import create_dataset, data_preparation
from recbole.model.sequential_recommender import GRU4Rec, FPMC, GRU4RecF, BERT4Rec, FOSSIL
from recbole.model.general_recommender import BPR, FISM, DMF, FISM, ItemKNN, MultiVAE, NeuMF, SpectralCF
from recbole.model.context_aware_recommender import DSSM
from recbole.trainer import Trainer
from recbole.utils import init_seed, init_logger
import pandas as pd
import os

In [None]:
DATA_PATH = './data/mind/preprocessed/'

INTER_DATA_PATH = os.path.join(DATA_PATH, "mind_large_train.inter")
ITEM_DATA_PATH = os.path.join(DATA_PATH, "mind_large_train.item")

In [None]:
inter_df = pd.read_csv(INTER_DATA_PATH, sep='\t')

In [None]:
inter_df.sample(5)

In [None]:
item_df = pd.read_csv(ITEM_DATA_PATH, sep='\t')

In [None]:
from recbole.quick_start import run_recbole
from utils.utils import load_config
cfg_path = "./configs/general/bpr.yaml"

config_dict = load_config(cfg_path)
run_recbole(
    model='BPR',
    dataset='mind_small',
    config_dict=config_dict
)

In [7]:
import gdown
import os

In [8]:
os.getcwd()

'c:\\Users\\Aleksey Ryabykin\\Documents\\GitHub\\feed-ranking'

In [2]:
from utils.utils import load_config, load_data

In [3]:
load_data("mind_small")

Creating dir data
Loading data


Retrieving folder list


Processing file 1M-48Y8oTk0p77jJ6Q_JjbteqBwo6d249 mind_small.dev.inter
Processing file 1Ti49GHmiJJmON_VjxZgSf5zsjNMwMR5S mind_small.dev.item
Processing file 1k-2vSdwjPvwINsOW8tHS3xAVzJ_Fewwo mind_small.train.inter
Processing file 1z_MPGmg65M6fTcNcq1UI34cBTW6XTLFz mind_small.train.item
Building directory structure completed


Retrieving folder list completed
Building directory structure
Downloading...
From: https://drive.google.com/uc?id=1M-48Y8oTk0p77jJ6Q_JjbteqBwo6d249
To: c:\Users\Aleksey Ryabykin\Documents\GitHub\feed-ranking\data\preprocessed\mind_small\mind_small.dev.inter
100%|██████████| 70.4M/70.4M [00:06<00:00, 11.4MB/s]
Downloading...
From: https://drive.google.com/uc?id=1Ti49GHmiJJmON_VjxZgSf5zsjNMwMR5S
To: c:\Users\Aleksey Ryabykin\Documents\GitHub\feed-ranking\data\preprocessed\mind_small\mind_small.dev.item
100%|██████████| 33.5M/33.5M [00:02<00:00, 11.3MB/s]
Downloading...
From (uriginal): https://drive.google.com/uc?id=1k-2vSdwjPvwINsOW8tHS3xAVzJ_Fewwo
From (redirected): https://drive.google.com/uc?id=1k-2vSdwjPvwINsOW8tHS3xAVzJ_Fewwo&confirm=t&uuid=c16eb59d-7e3c-46b9-912f-de5cee8e34a3
To: c:\Users\Aleksey Ryabykin\Documents\GitHub\feed-ranking\data\preprocessed\mind_small\mind_small.train.inter
100%|██████████| 150M/150M [00:13<00:00, 11.2MB/s] 
Downloading...
From: https://drive.google.co

Check c:\Users\Aleksey Ryabykin\Documents\GitHub\feed-ranking\data/preprocessed/mind_small



Download completed


In [100]:
import re
import glob
import os

def extract_metrics_from_log(log_file):
    with open(log_file, 'r') as file:
        log_content = file.read()
    metrics = re.findall(r"metrics = \[([^]]+)\]", log_content)[0].split(', ')
    metrics = [str(metric.lower())[1:-1] for metric in metrics]
    results_dict = {}
    for metric in metrics:
        metric_dict = {}
        pattern = r'{}@(\d+) : ([\d.]+)'.format(metric)
        metric_matches = re.findall(pattern, log_content)
        for key, value in metric_matches:
            if key in metric_dict:
                metric_dict[key].append(float(value))
            else:
                metric_dict[key] = [float(value)]
        results_dict[metric] = metric_dict
    return results_dict


def get_best_metrics(log_file):
    metrics = extract_metrics_from_log(log_file)
    results = {}
    for metric, ks in metrics.items():
        for k, values in ks.items():
            results[metric + "@" + k] = max(values)

    return results

def get_last_log(model_type, dataset):
    list_of_files = glob.glob(f'log/{model_type}/{model_type}-{dataset}-*')
    latest_file = max(list_of_files, key=os.path.getctime)
    return latest_file


def update_markdown(model_type, dataset, name, log_file: str=None):
    if log_file:
        data = {name: get_best_metrics(log_file)}
    else:
        data = {name: get_best_metrics(get_last_log(model_type, dataset))}
    metrics = ["precision@1", "precision@5", "precision@10",
           "recall@1", "recall@5", "recall@10",
           "map@1", "map@5", "map@10",
           "ndcg@1", "ndcg@5", "ndcg@10"]

    with open("README.md", "r") as file:
        markdown_content = file.read()

    table_regex = r"\| Model .*? \|.*?\|\s*\n([\s\S]*?)\n\n"
    table_match = re.search(table_regex, markdown_content, re.MULTILINE)

    if table_match:
        existing_table = table_match.group(1)

        table_data = [line.split("|")[1:-1] for line in existing_table.split("\n") if line.strip()]
        new_row = [name]
        new_row.extend([str(data[name][metric]) for metric in metrics])
        table_data.append(new_row)
        updated_table_content = "\n".join("| " + " | ".join(row) + " |" for row in table_data)

        header = "| Model | " + " | ".join(metrics) + " |\n"
        updated_markdown_content = re.sub(table_regex, f"{header}{updated_table_content}\n\n", markdown_content, flags=re.MULTILINE)

        with open("README.md", "w") as file:
            file.write(updated_markdown_content)
    else:
        print("No table found in the Markdown file.")

In [104]:
update_markdown('GRU4Rec', 'mind_small', "GRU baseline", 'log/GRU4Rec/GRU4Rec-mind_small-Jun-05-2023_20-30-17-03ad2b.log')

In [52]:
data = {: get_best_metrics(get_last_log(model_type, dataset))}

In [5]:
x = extract_metrics_from_log('log_example.log')

In [53]:


table = "| Model | " + " | ".join(metrics) + " |\n"
table += "| --- | " + " | ".join(["---"] * len(metrics)) + " |\n"

table += f"| {name}"
for metric in metrics:
    table += f" | {data[name][metric]}"

table += " |"

In [89]:
import re

model_type = 'GRU4Rec'
dataset = "mind_small"
name = "Exp 3"


data = {name: get_best_metrics(get_last_log(model_type, dataset))}


with open("README.md", "r") as file:
    markdown_content = file.read()

table_regex = r"\| Model .*? \|.*?\|\s*\n([\s\S]*?)\n\n"
table_match = re.search(table_regex, markdown_content, re.MULTILINE)

if table_match:
    existing_table = table_match.group(1)

    table_data = [line.split("|")[1:-1] for line in existing_table.split("\n") if line.strip()]
    new_row = [name]
    new_row.extend([str(data[name][metric]) for metric in metrics])

    table_data.append(new_row)
    updated_table_content = "\n".join("| " + " | ".join(row) + " |" for row in table_data)

    header = "| Model | " + " | ".join(metrics) + " |\n"
    updated_markdown_content = re.sub(table_regex, f"{header}{updated_table_content}\n\n", markdown_content, flags=re.MULTILINE)

    with open("README.md", "w") as file:
        file.write(updated_markdown_content)
else:
    print("No table found in the Markdown file.")


In [84]:
table_match

<re.Match object; span=(970, 1309), match='| Model | precision@1 | precision@5 | precision@1>

In [1]:
import pandas as pd

In [4]:
df= pd.read_csv('data/preprocessed/mind_small/mind_small.train.inter', sep='\t')