diff --git a/all_requirements.txt b/all_requirements.txt new file mode 100644 index 0000000..9da0e49 --- /dev/null +++ b/all_requirements.txt @@ -0,0 +1,129 @@ +absl-py==1.4.0 +alembic==1.4.1 +asttokens==2.2.1 +astunparse==1.6.3 +backcall==0.2.0 +beautifulsoup4==4.10.0 +blessed==1.17.6 +cachetools==5.2.1 +certifi==2021.5.30 +charset-normalizer==2.0.6 +click==8.0.1 +cloudpickle==2.0.0 +comm==0.1.2 +contourpy==1.0.6 +cycler==0.10.0 +databricks-cli==0.15.0 +debugpy==1.6.5 +decorator==5.1.1 +docker==5.0.2 +entrypoints==0.3 +executing==1.2.0 +filelock==3.9.0 +Flask==2.0.2 +flatbuffers==23.3.3 +fonttools==4.38.0 +gast==0.4.0 +gdown==4.6.0 +gitdb==4.0.7 +GitPython==3.1.24 +google-auth==2.16.3 +google-auth-oauthlib==0.4.6 +google-pasta==0.2.0 +googleDriveFileDownloader==1.2 +greenlet==1.1.2 +grpcio==1.51.3 +gunicorn==20.1.0 +h5py==3.8.0 +idna==3.2 +importlib-metadata==4.8.1 +importlib-resources==5.10.2 +inquirer==2.7.0 +ipykernel==6.20.2 +ipython==8.8.0 +-e git+https://github.com/irec-org/irec.git@364f8663af1bd4670e2b047d80be09c79ab3c55a#egg=irec +itsdangerous==2.0.1 +jax==0.4.6 +jedi==0.18.2 +Jinja2==3.0.2 +joblib==1.0.1 +jupyter_client==7.4.9 +jupyter_core==5.1.3 +keras==2.11.0 +kiwisolver==1.3.2 +libclang==16.0.0 +llvmlite==0.39.1 +Mako==1.1.5 +Markdown==3.4.1 +MarkupSafe==2.0.1 +matplotlib==3.4.3 +matplotlib-inline==0.1.6 +mlflow==1.20.2 +nest-asyncio==1.5.6 +numba==0.56.4 +numpy==1.22.4 +oauthlib==3.2.2 +opt-einsum==3.3.0 +packaging==21.0 +pandas==1.3.3 +parso==0.8.3 +pexpect==4.8.0 +pickleshare==0.7.5 +Pillow==8.3.2 +pkg_resources==0.0.0 +platformdirs==2.6.2 +prometheus-client==0.11.0 +prometheus-flask-exporter==0.18.3 +prompt-toolkit==3.0.36 +protobuf==3.19.6 +psutil==5.9.4 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pyarrow==10.0.1 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +Pygments==2.14.0 +PyJWT==2.6.0 +pyparsing==2.4.7 +PySocks==1.7.1 +python-dateutil==2.8.2 +python-editor==1.0.4 +pytz==2021.3 +PyYAML==5.4.1 +pyzmq==25.0.0 +querystring-parser==1.2.4 +readchar==2.0.1 +requests==2.26.0 +requests-oauthlib==1.3.1 +rsa==4.9 +scikit-learn==1.0 +scipy==1.7.1 +seaborn==0.11.2 +shap==0.41.0 +six==1.16.0 +sklearn==0.0 +slicer==0.0.7 +smmap==4.0.0 +soupsieve==2.2.1 +SQLAlchemy==1.4.25 +sqlparse==0.4.2 +stack-data==0.6.2 +tabulate==0.8.9 +tensorboard==2.11.2 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorflow==2.11.0 +tensorflow-estimator==2.11.0 +tensorflow-io-gcs-filesystem==0.31.0 +termcolor==1.1.0 +threadpoolctl==3.0.0 +tornado==6.2 +tqdm==4.64.1 +traitlets==5.8.1 +typing-extensions==3.10.0.2 +urllib3==1.26.7 +wcwidth==0.2.5 +websocket-client==1.2.1 +Werkzeug==2.0.1 +wrapt==1.14.1 +zipp==3.6.0 diff --git a/irec/connector/utils.py b/irec/connector/utils.py index e34304a..ab4010c 100755 --- a/irec/connector/utils.py +++ b/irec/connector/utils.py @@ -1,44 +1,35 @@ -from os.path import sep import os - -from sqlalchemy import true -from app.scripts.others import errors -import pickle import yaml +import copy +import json +import scipy +import pickle +import mlflow +import os.path import secrets +import scipy.stats +import numpy as np +import collections +import pandas as pd import mlflow.tracking +import collections.abc import mlflow.entities -import mlflow -from mlflow.tracking import MlflowClient -import json -from collections import defaultdict +from os.path import sep from pathlib import Path -from irec.environment.dataset import Dataset -import collections -from app.scripts.others import constants -import matplotlib.ticker as mtick -import numpy as np import matplotlib.pyplot as plt -from os.path import sep -from irec.offline_experiments.evaluation_policies.base import EvaluationPolicy +from irec.utils import constants +import matplotlib.ticker as mtick +from collections import defaultdict +from mlflow.tracking import MlflowClient from irec.utils.Factory import AgentFactory -import scipy -from irec.offline_experiments.metric_evaluators.interaction import Interaction +from irec.environment.dataset import Dataset +from irec.offline_experiments.metrics.registry import MetricRegistry from irec.offline_experiments.metric_evaluators.cumulative import Cumulative -from irec.offline_experiments.metric_evaluators.user_cumulative_interaction import ( - UserCumulativeInteraction, -) - +from irec.offline_experiments.metric_evaluators.interaction import Interaction +from irec.offline_experiments.evaluation_policies.base import EvaluationPolicy from irec.offline_experiments.evaluation_policies.registry import EvalPolicyRegistry -from irec.offline_experiments.metrics.registry import MetricRegistry from irec.offline_experiments.metric_evaluators.registry import MetricEvaluatorRegistry - -import copy -import os.path -import collections.abc -import pandas as pd -import scipy.stats - +from irec.offline_experiments.metric_evaluators.user_cumulative_interaction import UserCumulativeInteraction LATEX_TABLE_FOOTER = r""" \end{tabular} @@ -541,10 +532,14 @@ def evaluate_itr(dataset, settings, forced_run): artifact_path = client.download_artifacts(run.info.run_id, "interactions.pickle") # print(artifact_path) interactions = pickle.load(open(artifact_path, "rb")) - metric_values = metric_evaluator.evaluate( - metric_class, - interactions, - ) + try: + metric_values = metric_evaluator.evaluate( + metric_class, + interactions, + ) + except Exception as e: + print("Error", e) + return with mlflow.start_run(run_id=run.info.run_id) as run: print(metric_evaluator, UserCumulativeInteraction) if isinstance(metric_evaluator, UserCumulativeInteraction): @@ -578,7 +573,7 @@ def load_evaluation_experiment(settings): run = get_evaluation_run(settings) if run is None: - raise errors.EvaluationRunNotFoundError("Could not find evaluation run") + raise Exception("Could not find evaluation run") client = MlflowClient() artifact_path = client.download_artifacts(run.info.run_id, "evaluation.pickle") # print(artifact_path) @@ -626,19 +621,6 @@ def get_evaluation_run_parameters(settings): return parameters_evaluation_run -# def get_agent_run(settings): -# parameters_evaluation_run = get_parameters_agent_run(settings) - -# # parameters_evaluation_run |= parameters_normalize( -# # constants.METRIC_PARAMETERS_PREFIX, settings["defaults"]["metric"], {} -# # ) -# run = already_ran( -# parameters_evaluation_run, -# mlflow.get_experiment_by_name( -# settings["defaults"]["evaluation_experiment"] -# ).experiment_id, -# ) -# return run def get_agent_run(settings): agent_run_parameters = get_agent_run_parameters(settings) run = already_ran( @@ -775,7 +757,7 @@ def run_agent_with_dataset_parameters( current_settings["agents"][agent_name] = dataset_agents_parameters[ dataset_loader_name ][agent_name] - if tasks>1: + if tasks > 1: f = executor.submit( run_agent, train_dataset, @@ -787,7 +769,7 @@ def run_agent_with_dataset_parameters( if len(futures) >= tasks: completed, futures = wait(futures, return_when=FIRST_COMPLETED) else: - run_agent(train_dataset, test_dataset,copy.deepcopy(current_settings),forced_run) + run_agent(train_dataset, test_dataset, copy.deepcopy(current_settings), forced_run) for f in futures: f.result() @@ -809,18 +791,8 @@ def print_results_latex_table( plt.rcParams["axes.prop_cycle"] = cycler(color="krbgmyc") plt.rcParams["lines.linewidth"] = 2 plt.rcParams["font.size"] = 15 - # metrics_classes = [metrics.Hits, metrics.Recall] metrics_classes = [MetricRegistry.get(i) for i in metrics] - # metrics_classes = [ - # metrics.Hits, - # metrics.Recall, - # # metrics.EPC, - # # metrics.Entropy, - # # metrics.UsersCoverage, - # # metrics.ILD, - # # metrics.GiniCoefficientInv, - # ] metrics_classes_names = list(map(lambda x: x.__name__, metrics_classes)) metrics_names = metrics_classes_names datasets_names = dataset_loaders @@ -831,26 +803,8 @@ def print_results_latex_table( metric_evaluator = MetricEvaluatorRegistry.get(metric_evaluator_name)(None, **metric_evaluator_parameters) evaluation_policy_name = settings["defaults"]["evaluation_policy"] - - # metrics_names = [ - # 'Cumulative Precision', - # 'Cumulative Recall', - # # 'Cumulative EPC', - # # 'Cumulative Entropy', - # # 'Cumulative Users Coverage', - # # 'Cumulative ILD', - # # '1-(Gini-Index)' - # ] - # metrics_weights = {'Entropy': 0.5,'EPC':0.5} - # metrics_weights = {'Hits': 0.3,'Recall':0.3,'EPC':0.1,'UsersCoverage':0.1,'ILD':0.1,'GiniCoefficientInv':0.1} - # metrics_weights = {'Hits': 0.3,'Recall':0.3,'EPC':0.16666,'UsersCoverage':0.16666,'ILD':0.16666} - # metrics_weights = {'Hits': 0.25,'Recall':0.25,'EPC':0.125,'UsersCoverage':0.125,'ILD':0.125,'GiniCoefficientInv':0.125} metrics_weights = {i: 1 / len(metrics_classes_names) for i in metrics_classes_names} - # interactors_classes_names_to_names = { - # k: v["name"] for k, v in settings["agents_general_settings"].items() - # } - print("metric_evaluator_name", metric_evaluator_name) if metric_evaluator_name == "StageIterations": nums_interactions_to_show = ["1-5", "6-10", "11-15", "16-20", "21-50", "51-100"] @@ -923,11 +877,6 @@ def generate_table_spec(): ] settings["agents"][agent_name] = agent_parameters settings["defaults"]["metric"] = metric_class_name - # agent = AgentFactory().create(agent_name, agent_parameters) - # agent_id = get_agent_id(agent_name, agent_parameters) - # dataset_parameters = settings["dataset_loaders"][dataset_loader_name] - # metrics_evaluator_name = metric_evaluator.__class__.__name__ - # parameters_agent_run = get_agent_run_parameters(settings) parameters_evaluation_run = get_evaluation_run_parameters(settings) mlflow.set_experiment(settings["defaults"]["evaluation_experiment"]) @@ -945,7 +894,6 @@ def generate_table_spec(): run.info.run_id, "evaluation.pickle" ) metric_values = pickle.load(open(artifact_path, "rb")) - # users_items_recommended = metric_values datasets_metrics_values[dataset_loader_name][metric_class_name][ agent_name @@ -1034,15 +982,11 @@ def generate_table_spec(): ].append(np.array([maut] * 100)) if dump: - # with open('datasets_metrics_values.pickle','wb') as f: - # pickle.dump(datasets_metrics_values,f) with open("datasets_metrics_values.pickle", "wb") as f: pickle.dump(json.loads(json.dumps(datasets_metrics_values)), f) - # f.write(str(methods_users_hits)) - # print(datasets_metrics_values['Yahoo Music']['MAUT']) - metrics_classes_names.append("MAUT") - metrics_names.append("MAUT") + # metrics_classes_names.append("MAUT") + # metrics_names.append("MAUT") datasets_metrics_gain = defaultdict( lambda: defaultdict( @@ -1355,11 +1299,11 @@ def print_agent_search( metric_values = load_evaluation_experiment(settings) if metric_values is None: continue - except errors.EvaluationRunNotFoundError as e: - print(e) - continue except EOFError as e: - print(e) + print(["ERROR"], e) + continue + except Exception as e: + print(["ERROR"], e) continue datasets_metrics_values[settings["defaults"]["dataset_loader"]][ diff --git a/irec/recommendation/agents/value_functions/knn_bandit.py b/irec/recommendation/agents/value_functions/knn_bandit.py index 51d533e..dbc29c2 100644 --- a/irec/recommendation/agents/value_functions/knn_bandit.py +++ b/irec/recommendation/agents/value_functions/knn_bandit.py @@ -18,7 +18,7 @@ class kNNBandit(ValueFunction): bandit for interactive recommendation." Proceedings of the 13th ACM Conference on Recommender Systems. 2019. """ - def __init__(self, alpha_0, beta_0, k, *args, **kwargs): + def __init__(self, alpha_0, beta_0, k, threshold: int = 4, *args, **kwargs): """__init__. Args: @@ -29,6 +29,7 @@ def __init__(self, alpha_0, beta_0, k, *args, **kwargs): self.alpha_0 = alpha_0 self.beta_0 = beta_0 self.k = k + self.threshold = threshold def reset(self, observation): """reset. @@ -65,7 +66,7 @@ def reset(self, observation): uid = int(self.train_dataset.data[i, 0]) item = int(self.train_dataset.data[i, 1]) reward = self.train_dataset.data[i, 2] - reward = reward >= 4 + reward = reward >= self.threshold self.users_rating_sum[uid] += reward if len(self.items_consumed_users[item]) > 0: item_consumed_uids = np.array( @@ -129,7 +130,7 @@ def update(self, observation, action, reward, info): uid = action[0] item = action[1] additional_data = info - reward = reward >= 4 + reward = reward >= self.threshold if len(self.items_consumed_users[item]) > 0: item_consumed_uids = np.array([i for i in self.items_consumed_users[item]]) diff --git a/irec/recommendation/agents/value_functions/nicf.py b/irec/recommendation/agents/value_functions/nicf.py index 4b9958f..bc8e337 100644 --- a/irec/recommendation/agents/value_functions/nicf.py +++ b/irec/recommendation/agents/value_functions/nicf.py @@ -730,7 +730,7 @@ def reset(self, observation): self.train_dataset.data[:, 0] += 1 self.train_dataset.data[:, 1] += 1 self.train_dataset.data = self.train_dataset.data.astype(int) - self.train_dataset.update_from_data() + self.train_dataset.set_parameters() self.tau = 0 args = Namespace( diff --git a/irec/utils/constants.py b/irec/utils/constants.py index 47934c9..cf5c064 100644 --- a/irec/utils/constants.py +++ b/irec/utils/constants.py @@ -1 +1,6 @@ DATA_PATH = '../../../data' +DATASET_PARAMETERS_PREFIX = "dataset" +AGENT_PARAMETERS_PREFIX = "agent" +EVALUATION_POLICY_PARAMETERS_PREFIX = "evaluation_policy" +METRIC_EVALUATOR_PARAMETERS_PREFIX = "metric_evaluator" +METRIC_PARAMETERS_PREFIX = "metric" \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 28cef7a..29c535d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,4 +10,4 @@ sklearn threadpoolctl>=3.0.0 tqdm>=4.62.3 mlflow -cachetools +cachetools \ No newline at end of file