Skip to content

Commit

Permalink
Merge pull request #86 from irec-org/export-interactions
Browse files Browse the repository at this point in the history
Fixed Bugs
  • Loading branch information
thiagodks committed Mar 24, 2023
2 parents 3afeda2 + 16e000b commit c21f9f7
Show file tree
Hide file tree
Showing 6 changed files with 178 additions and 99 deletions.
129 changes: 129 additions & 0 deletions all_requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
absl-py==1.4.0
alembic==1.4.1
asttokens==2.2.1
astunparse==1.6.3
backcall==0.2.0
beautifulsoup4==4.10.0
blessed==1.17.6
cachetools==5.2.1
certifi==2021.5.30
charset-normalizer==2.0.6
click==8.0.1
cloudpickle==2.0.0
comm==0.1.2
contourpy==1.0.6
cycler==0.10.0
databricks-cli==0.15.0
debugpy==1.6.5
decorator==5.1.1
docker==5.0.2
entrypoints==0.3
executing==1.2.0
filelock==3.9.0
Flask==2.0.2
flatbuffers==23.3.3
fonttools==4.38.0
gast==0.4.0
gdown==4.6.0
gitdb==4.0.7
GitPython==3.1.24
google-auth==2.16.3
google-auth-oauthlib==0.4.6
google-pasta==0.2.0
googleDriveFileDownloader==1.2
greenlet==1.1.2
grpcio==1.51.3
gunicorn==20.1.0
h5py==3.8.0
idna==3.2
importlib-metadata==4.8.1
importlib-resources==5.10.2
inquirer==2.7.0
ipykernel==6.20.2
ipython==8.8.0
-e git+https://github.com/irec-org/irec.git@364f8663af1bd4670e2b047d80be09c79ab3c55a#egg=irec
itsdangerous==2.0.1
jax==0.4.6
jedi==0.18.2
Jinja2==3.0.2
joblib==1.0.1
jupyter_client==7.4.9
jupyter_core==5.1.3
keras==2.11.0
kiwisolver==1.3.2
libclang==16.0.0
llvmlite==0.39.1
Mako==1.1.5
Markdown==3.4.1
MarkupSafe==2.0.1
matplotlib==3.4.3
matplotlib-inline==0.1.6
mlflow==1.20.2
nest-asyncio==1.5.6
numba==0.56.4
numpy==1.22.4
oauthlib==3.2.2
opt-einsum==3.3.0
packaging==21.0
pandas==1.3.3
parso==0.8.3
pexpect==4.8.0
pickleshare==0.7.5
Pillow==8.3.2
pkg_resources==0.0.0
platformdirs==2.6.2
prometheus-client==0.11.0
prometheus-flask-exporter==0.18.3
prompt-toolkit==3.0.36
protobuf==3.19.6
psutil==5.9.4
ptyprocess==0.7.0
pure-eval==0.2.2
pyarrow==10.0.1
pyasn1==0.4.8
pyasn1-modules==0.2.8
Pygments==2.14.0
PyJWT==2.6.0
pyparsing==2.4.7
PySocks==1.7.1
python-dateutil==2.8.2
python-editor==1.0.4
pytz==2021.3
PyYAML==5.4.1
pyzmq==25.0.0
querystring-parser==1.2.4
readchar==2.0.1
requests==2.26.0
requests-oauthlib==1.3.1
rsa==4.9
scikit-learn==1.0
scipy==1.7.1
seaborn==0.11.2
shap==0.41.0
six==1.16.0
sklearn==0.0
slicer==0.0.7
smmap==4.0.0
soupsieve==2.2.1
SQLAlchemy==1.4.25
sqlparse==0.4.2
stack-data==0.6.2
tabulate==0.8.9
tensorboard==2.11.2
tensorboard-data-server==0.6.1
tensorboard-plugin-wit==1.8.1
tensorflow==2.11.0
tensorflow-estimator==2.11.0
tensorflow-io-gcs-filesystem==0.31.0
termcolor==1.1.0
threadpoolctl==3.0.0
tornado==6.2
tqdm==4.64.1
traitlets==5.8.1
typing-extensions==3.10.0.2
urllib3==1.26.7
wcwidth==0.2.5
websocket-client==1.2.1
Werkzeug==2.0.1
wrapt==1.14.1
zipp==3.6.0
132 changes: 38 additions & 94 deletions irec/connector/utils.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,35 @@
from os.path import sep
import os

from sqlalchemy import true
from app.scripts.others import errors
import pickle
import yaml
import copy
import json
import scipy
import pickle
import mlflow
import os.path
import secrets
import scipy.stats
import numpy as np
import collections
import pandas as pd
import mlflow.tracking
import collections.abc
import mlflow.entities
import mlflow
from mlflow.tracking import MlflowClient
import json
from collections import defaultdict
from os.path import sep
from pathlib import Path
from irec.environment.dataset import Dataset
import collections
from app.scripts.others import constants
import matplotlib.ticker as mtick
import numpy as np
import matplotlib.pyplot as plt
from os.path import sep
from irec.offline_experiments.evaluation_policies.base import EvaluationPolicy
from irec.utils import constants
import matplotlib.ticker as mtick
from collections import defaultdict
from mlflow.tracking import MlflowClient
from irec.utils.Factory import AgentFactory
import scipy
from irec.offline_experiments.metric_evaluators.interaction import Interaction
from irec.environment.dataset import Dataset
from irec.offline_experiments.metrics.registry import MetricRegistry
from irec.offline_experiments.metric_evaluators.cumulative import Cumulative
from irec.offline_experiments.metric_evaluators.user_cumulative_interaction import (
UserCumulativeInteraction,
)

from irec.offline_experiments.metric_evaluators.interaction import Interaction
from irec.offline_experiments.evaluation_policies.base import EvaluationPolicy
from irec.offline_experiments.evaluation_policies.registry import EvalPolicyRegistry
from irec.offline_experiments.metrics.registry import MetricRegistry
from irec.offline_experiments.metric_evaluators.registry import MetricEvaluatorRegistry

import copy
import os.path
import collections.abc
import pandas as pd
import scipy.stats

from irec.offline_experiments.metric_evaluators.user_cumulative_interaction import UserCumulativeInteraction

LATEX_TABLE_FOOTER = r"""
\end{tabular}
Expand Down Expand Up @@ -541,10 +532,14 @@ def evaluate_itr(dataset, settings, forced_run):
artifact_path = client.download_artifacts(run.info.run_id, "interactions.pickle")
# print(artifact_path)
interactions = pickle.load(open(artifact_path, "rb"))
metric_values = metric_evaluator.evaluate(
metric_class,
interactions,
)
try:
metric_values = metric_evaluator.evaluate(
metric_class,
interactions,
)
except Exception as e:
print("Error", e)
return
with mlflow.start_run(run_id=run.info.run_id) as run:
print(metric_evaluator, UserCumulativeInteraction)
if isinstance(metric_evaluator, UserCumulativeInteraction):
Expand Down Expand Up @@ -578,7 +573,7 @@ def load_evaluation_experiment(settings):
run = get_evaluation_run(settings)

if run is None:
raise errors.EvaluationRunNotFoundError("Could not find evaluation run")
raise Exception("Could not find evaluation run")
client = MlflowClient()
artifact_path = client.download_artifacts(run.info.run_id, "evaluation.pickle")
# print(artifact_path)
Expand Down Expand Up @@ -626,19 +621,6 @@ def get_evaluation_run_parameters(settings):
return parameters_evaluation_run


# def get_agent_run(settings):
# parameters_evaluation_run = get_parameters_agent_run(settings)

# # parameters_evaluation_run |= parameters_normalize(
# # constants.METRIC_PARAMETERS_PREFIX, settings["defaults"]["metric"], {}
# # )
# run = already_ran(
# parameters_evaluation_run,
# mlflow.get_experiment_by_name(
# settings["defaults"]["evaluation_experiment"]
# ).experiment_id,
# )
# return run
def get_agent_run(settings):
agent_run_parameters = get_agent_run_parameters(settings)
run = already_ran(
Expand Down Expand Up @@ -775,7 +757,7 @@ def run_agent_with_dataset_parameters(
current_settings["agents"][agent_name] = dataset_agents_parameters[
dataset_loader_name
][agent_name]
if tasks>1:
if tasks > 1:
f = executor.submit(
run_agent,
train_dataset,
Expand All @@ -787,7 +769,7 @@ def run_agent_with_dataset_parameters(
if len(futures) >= tasks:
completed, futures = wait(futures, return_when=FIRST_COMPLETED)
else:
run_agent(train_dataset, test_dataset,copy.deepcopy(current_settings),forced_run)
run_agent(train_dataset, test_dataset, copy.deepcopy(current_settings), forced_run)

for f in futures:
f.result()
Expand All @@ -809,18 +791,8 @@ def print_results_latex_table(
plt.rcParams["axes.prop_cycle"] = cycler(color="krbgmyc")
plt.rcParams["lines.linewidth"] = 2
plt.rcParams["font.size"] = 15
# metrics_classes = [metrics.Hits, metrics.Recall]
metrics_classes = [MetricRegistry.get(i) for i in metrics]

# metrics_classes = [
# metrics.Hits,
# metrics.Recall,
# # metrics.EPC,
# # metrics.Entropy,
# # metrics.UsersCoverage,
# # metrics.ILD,
# # metrics.GiniCoefficientInv,
# ]
metrics_classes_names = list(map(lambda x: x.__name__, metrics_classes))
metrics_names = metrics_classes_names
datasets_names = dataset_loaders
Expand All @@ -831,26 +803,8 @@ def print_results_latex_table(
metric_evaluator = MetricEvaluatorRegistry.get(metric_evaluator_name)(None, **metric_evaluator_parameters)

evaluation_policy_name = settings["defaults"]["evaluation_policy"]

# metrics_names = [
# 'Cumulative Precision',
# 'Cumulative Recall',
# # 'Cumulative EPC',
# # 'Cumulative Entropy',
# # 'Cumulative Users Coverage',
# # 'Cumulative ILD',
# # '1-(Gini-Index)'
# ]
# metrics_weights = {'Entropy': 0.5,'EPC':0.5}
# metrics_weights = {'Hits': 0.3,'Recall':0.3,'EPC':0.1,'UsersCoverage':0.1,'ILD':0.1,'GiniCoefficientInv':0.1}
# metrics_weights = {'Hits': 0.3,'Recall':0.3,'EPC':0.16666,'UsersCoverage':0.16666,'ILD':0.16666}
# metrics_weights = {'Hits': 0.25,'Recall':0.25,'EPC':0.125,'UsersCoverage':0.125,'ILD':0.125,'GiniCoefficientInv':0.125}
metrics_weights = {i: 1 / len(metrics_classes_names) for i in metrics_classes_names}

# interactors_classes_names_to_names = {
# k: v["name"] for k, v in settings["agents_general_settings"].items()
# }

print("metric_evaluator_name", metric_evaluator_name)
if metric_evaluator_name == "StageIterations":
nums_interactions_to_show = ["1-5", "6-10", "11-15", "16-20", "21-50", "51-100"]
Expand Down Expand Up @@ -923,11 +877,6 @@ def generate_table_spec():
]
settings["agents"][agent_name] = agent_parameters
settings["defaults"]["metric"] = metric_class_name
# agent = AgentFactory().create(agent_name, agent_parameters)
# agent_id = get_agent_id(agent_name, agent_parameters)
# dataset_parameters = settings["dataset_loaders"][dataset_loader_name]
# metrics_evaluator_name = metric_evaluator.__class__.__name__
# parameters_agent_run = get_agent_run_parameters(settings)
parameters_evaluation_run = get_evaluation_run_parameters(settings)

mlflow.set_experiment(settings["defaults"]["evaluation_experiment"])
Expand All @@ -945,7 +894,6 @@ def generate_table_spec():
run.info.run_id, "evaluation.pickle"
)
metric_values = pickle.load(open(artifact_path, "rb"))
# users_items_recommended = metric_values

datasets_metrics_values[dataset_loader_name][metric_class_name][
agent_name
Expand Down Expand Up @@ -1034,15 +982,11 @@ def generate_table_spec():
].append(np.array([maut] * 100))

if dump:
# with open('datasets_metrics_values.pickle','wb') as f:
# pickle.dump(datasets_metrics_values,f)
with open("datasets_metrics_values.pickle", "wb") as f:
pickle.dump(json.loads(json.dumps(datasets_metrics_values)), f)
# f.write(str(methods_users_hits))
# print(datasets_metrics_values['Yahoo Music']['MAUT'])

metrics_classes_names.append("MAUT")
metrics_names.append("MAUT")
# metrics_classes_names.append("MAUT")
# metrics_names.append("MAUT")

datasets_metrics_gain = defaultdict(
lambda: defaultdict(
Expand Down Expand Up @@ -1355,11 +1299,11 @@ def print_agent_search(
metric_values = load_evaluation_experiment(settings)
if metric_values is None: continue

except errors.EvaluationRunNotFoundError as e:
print(e)
continue
except EOFError as e:
print(e)
print(["ERROR"], e)
continue
except Exception as e:
print(["ERROR"], e)
continue

datasets_metrics_values[settings["defaults"]["dataset_loader"]][
Expand Down
7 changes: 4 additions & 3 deletions irec/recommendation/agents/value_functions/knn_bandit.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class kNNBandit(ValueFunction):
bandit for interactive recommendation." Proceedings of the 13th ACM Conference on Recommender Systems. 2019.
"""

def __init__(self, alpha_0, beta_0, k, *args, **kwargs):
def __init__(self, alpha_0, beta_0, k, threshold: int = 4, *args, **kwargs):
"""__init__.
Args:
Expand All @@ -29,6 +29,7 @@ def __init__(self, alpha_0, beta_0, k, *args, **kwargs):
self.alpha_0 = alpha_0
self.beta_0 = beta_0
self.k = k
self.threshold = threshold

def reset(self, observation):
"""reset.
Expand Down Expand Up @@ -65,7 +66,7 @@ def reset(self, observation):
uid = int(self.train_dataset.data[i, 0])
item = int(self.train_dataset.data[i, 1])
reward = self.train_dataset.data[i, 2]
reward = reward >= 4
reward = reward >= self.threshold
self.users_rating_sum[uid] += reward
if len(self.items_consumed_users[item]) > 0:
item_consumed_uids = np.array(
Expand Down Expand Up @@ -129,7 +130,7 @@ def update(self, observation, action, reward, info):
uid = action[0]
item = action[1]
additional_data = info
reward = reward >= 4
reward = reward >= self.threshold

if len(self.items_consumed_users[item]) > 0:
item_consumed_uids = np.array([i for i in self.items_consumed_users[item]])
Expand Down
Loading

0 comments on commit c21f9f7

Please sign in to comment.