diff --git a/tutorials/datasets/MovieLens 100k/dic_info.json b/tutorials/datasets/MovieLens 100k/dic_info.json new file mode 100644 index 0000000..7a802d7 --- /dev/null +++ b/tutorials/datasets/MovieLens 100k/dic_info.json @@ -0,0 +1,37 @@ +{ + "name": "ML-100K", + "sparsity": 0.937056884900853, + "user_information": { + "std_consumption": 100.93613152085523, + "min_consumption": 1.0, + "median_consumption": 64.5, + "mean_consumption": 105.9332627118644, + "max_consumption": 737.0, + "num_user": 944.0, + ">=75%": 148.0, + ">=50%": 64.5, + ">=25%": 33.0 + }, + "item_information": { + "std_ratings": 80.37257740532644, + "min_ratings": 1.0, + "median_ratings": 27.0, + "mean_ratings": 59.41830065359477, + "max_ratings": 583.0, + "num_item": 1683.0, + ">=75%": 80.0, + ">=50%": 27.0, + ">=25%": 6.0 + }, + "ratings_information": { + "std": 1.1257233133724367, + "min": 0.0, + "median": 4.0, + "mean": 3.5298247017529825, + "max": 5.0, + "num_ratings": 100001.0, + ">=75%": 4.0, + ">=50%": 4.0, + ">=25%": 3.0 + } +} \ No newline at end of file diff --git a/tutorials/datasets/MovieLens 100k/informations.pdf b/tutorials/datasets/MovieLens 100k/informations.pdf new file mode 100644 index 0000000..60ad41c Binary files /dev/null and b/tutorials/datasets/MovieLens 100k/informations.pdf differ diff --git a/tutorials/datasets/MovieLens 100k/informations.tex b/tutorials/datasets/MovieLens 100k/informations.tex new file mode 100644 index 0000000..5aa9978 --- /dev/null +++ b/tutorials/datasets/MovieLens 100k/informations.tex @@ -0,0 +1,90 @@ + +\documentclass{article} + +\usepackage{multirow} +\usepackage{color, colortbl} +\usepackage{xcolor, soul} +\usepackage[utf8]{inputenc} +\usepackage[T1]{fontenc} +\usepackage[brazil]{babel} +\usepackage{graphicx} +\usepackage{subcaption} + +\definecolor{Gray}{gray}{0.9} +\definecolor{StrongGray}{gray}{0.7} + +\title{Dataset Information} +\begin{document} +\maketitle + + \begin{table}[h!] + \centering + \begin{tabular}{ |c||c|c|c| } \hline + + \rowcolor{StrongGray} + \multicolumn{4}{|c|}{ML-100K Dataset Information - Sparsity: 0.9370} \\ \hline \hline + + \rowcolor{Gray} + Information & User Consumption Info & Item Rating Info & Rating Info\\ \hline \hline + + STD & 100.9 & 80.3 & 1.1\\ \hline + MIN & 1.0 & 1.0 & 0.0\\ \hline + MEDIAN & 64.5 & 27.0 & 4.0\\ \hline + MEAN & 105.9 & 59.4 & 3.5\\ \hline + MAX & 737.0 & 583.0 & 5.0\\ \hline + NUM & 944.0 & 1683.0 & 100001.0\\ \hline + $\geq75$ & 148.0 & 80.0 & 4.0\\ \hline + $\geq50$ & 64.5 & 27.0 & 4.0\\ \hline + $\geq25$ & 33.0 & 6.0 & 3.0\\ \hline + + \end{tabular} + \caption{Information about users, items and ratings.} + \label{table:1} + \end{table} + + + \begin{figure}[!ht] + \centering + \begin{minipage}{0.5\textwidth} + \centering + \includegraphics[width=0.9\textwidth]{./Analysis/ML-100K/Graphics/corr_pop_ent.png} + \caption{corr pop ent} + \label{fig:figura1minipg} + \end{minipage}\hfill + \begin{minipage}{0.5\textwidth} + \centering + \includegraphics[width=0.9\textwidth]{./Analysis/ML-100K/Graphics/corr_meanratings_pop.png} + \caption{corr meanratings pop} + \label{fig:figura1minipg} + \end{minipage}\hfill + \begin{minipage}{0.5\textwidth} + \centering + \includegraphics[width=0.9\textwidth]{./Analysis/ML-100K/Graphics/users_consuption.png} + \caption{users consuption} + \label{fig:figura1minipg} + \end{minipage}\hfill + \begin{minipage}{0.5\textwidth} + \centering + \includegraphics[width=0.9\textwidth]{./Analysis/ML-100K/Graphics/corr_itemsfeatures_pop.png} + \caption{corr itemsfeatures pop} + \label{fig:figura1minipg} + \end{minipage}\hfill + \begin{minipage}{0.5\textwidth} + \centering + \includegraphics[width=0.9\textwidth]{./Analysis/ML-100K/Graphics/items_rated.png} + \caption{items rated} + \label{fig:figura1minipg} + \end{minipage}\hfill + \begin{minipage}{0.5\textwidth} + \centering + \includegraphics[width=0.9\textwidth]{./Analysis/ML-100K/Graphics/corr_meanratings_ent.png} + \caption{corr meanratings ent} + \label{fig:figura1minipg} + \end{minipage}\hfill + + % \caption{Graphics} + \label{fig:figurasminipg} + \end{figure} + +\end{document} + \ No newline at end of file diff --git a/tutorials/datasets/MovieLens 100k/top-100_items_entropy.txt b/tutorials/datasets/MovieLens 100k/top-100_items_entropy.txt new file mode 100644 index 0000000..5e1175f --- /dev/null +++ b/tutorials/datasets/MovieLens 100k/top-100_items_entropy.txt @@ -0,0 +1,101 @@ +item_id score +815 1.597234203958847 +294 1.592192307331314 +61 1.587882168209801 +1405 1.5792331346113389 +497 1.576695992330607 +527 1.5748427300244527 +374 1.5724887547715412 +928 1.571061161892474 +798 1.5678962872108388 +1229 1.565845452554416 +744 1.5648538428609964 +265 1.5630695429447656 +360 1.5610048541234605 +223 1.5609206710923602 +897 1.5607104090414063 +1293 1.5607104090414063 +691 1.5590931042048033 +171 1.5571130980576458 +419 1.5571130980576458 +848 1.5571130980576458 +1233 1.5571130980576455 +870 1.556910568114504 +666 1.5558794724488303 +466 1.5556646696181333 +1343 1.5544328269016834 +1058 1.553019134703412 +1225 1.5530014019808 +233 1.5525140696556048 +1146 1.5508457063832817 +940 1.5498260458782016 +906 1.5494170420810358 +964 1.5465986860485126 +976 1.5465986860485126 +1117 1.5465986860485126 +1317 1.5465986860485126 +1226 1.5465986860485124 +477 1.5456055131100814 +451 1.5454229031935938 +1421 1.5437891850588954 +995 1.5437672979880863 +930 1.5419584371829487 +331 1.54065205313336 +8 1.5389266301898288 +123 1.5354206959078445 +563 1.5351908491313153 +598 1.5351908491313153 +468 1.5347676808102217 +517 1.533893392285119 +1115 1.533263066909204 +341 1.5305397243418035 +316 1.530467652890292 +1223 1.53022074156986 +602 1.5300089139055837 +1192 1.5292482930376914 +585 1.5288460918887299 +796 1.528652087172375 +883 1.5270355158941487 +1243 1.5260394155804642 +1116 1.5257607683920602 +885 1.5256715197633526 +410 1.5247073930301436 +1142 1.5247073930301436 +1301 1.5247073930301436 +890 1.5242564032679284 +391 1.5235967393304184 +413 1.5233999822609356 +572 1.522961660632849 +614 1.5227235330986488 +955 1.5227136689800211 +269 1.5226965458840906 +886 1.5214620898340783 +352 1.5203298028074628 +925 1.5196918474879466 +252 1.5184174505936268 +1059 1.518267124767796 +262 1.5179142731543076 +700 1.5178210748928995 +1083 1.5176745294625686 +402 1.5171063970610277 +972 1.5171063970610277 +1246 1.5171063970610275 +1315 1.515707952085713 +1453 1.5157079520857129 +954 1.5154125178887332 +594 1.514340872488177 +59 1.5138149024461531 +680 1.5134058386196787 +742 1.5123786573528997 +520 1.5104288998570303 +181 1.5100527352298625 +814 1.509971678751745 +1042 1.5095825913076644 +797 1.509544687783763 +348 1.5089424503045463 +849 1.5082642804351818 +175 1.5081132446402752 +381 1.5080715571604042 +1070 1.5077294282174092 +526 1.507519552871314 +1031 1.5068409398295437 \ No newline at end of file diff --git a/tutorials/datasets/MovieLens 100k/top-100_items_popularity.txt b/tutorials/datasets/MovieLens 100k/top-100_items_popularity.txt new file mode 100644 index 0000000..0572cf3 --- /dev/null +++ b/tutorials/datasets/MovieLens 100k/top-100_items_popularity.txt @@ -0,0 +1,101 @@ +item_id score +357 583 +157 509 +49 508 +52 507 +95 485 +289 481 +60 478 +24 452 +652 431 +403 429 +101 420 +189 413 +216 394 +209 392 +31 390 +77 384 +140 378 +161 367 +12 365 +247 350 +719 350 +136 344 +156 336 +347 331 +103 326 +321 324 +191 321 +175 316 +367 316 +491 315 +10 303 +112 301 +57 300 +502 299 +200 298 +695 298 +1 297 +408 297 +98 295 +102 295 +23 293 +68 293 +86 293 +256 291 +166 290 +311 284 +239 283 +29 280 +179 280 +254 276 +471 276 +217 275 +83 272 +89 268 +297 267 +364 267 +240 264 +231 261 +118 259 +360 259 +320 256 +329 256 +139 255 +25 254 +53 251 +280 251 +307 251 +423 251 +355 250 +221 247 +100 246 +182 244 +552 244 +22 243 +174 243 +34 241 +117 240 +144 240 +273 239 +361 239 +229 236 +647 232 +99 231 +201 230 +404 230 +6 227 +431 227 +356 226 +141 223 +56 222 +120 221 +492 221 +571 221 +309 220 +456 220 +26 219 +66 219 +274 219 +389 219 +51 218 \ No newline at end of file diff --git a/tutorials/example_ensemble_agents.ipynb b/tutorials/example_ensemble_agents.ipynb index 36c9b43..666b0c5 100644 --- a/tutorials/example_ensemble_agents.ipynb +++ b/tutorials/example_ensemble_agents.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 20, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -57,7 +57,7 @@ "source": [ "# Dataset\n", "dataset = {\n", - " 'path': \"../datasets/MovieLens 100k/ratings.csv\",\n", + " 'path': \"datasets/MovieLens 100k/ratings.csv\",\n", " 'random_seed': 0,\n", " 'file_delimiter': \",\",\n", " 'skip_head': True\n", @@ -80,7 +80,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -100,7 +100,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -123,18 +123,23 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "asp_sea = ASPGenericGreedy()\n", "vf_sea = GenericThompsonSampling(**params[\"GenericThompsonSampling\"])\n", - "ensemble_agent = SimpleEnsembleAgent(agents=[agent1, agent2], action_selection_policy=asp_sea, name=\"EnsebleAgent\", value_function=vf_sea)" + "ensemble_agent = SimpleEnsembleAgent(\n", + " agents=[agent1, agent2],\n", + " action_selection_policy=asp_sea,\n", + " name=\"EnsembleAgent\",\n", + " value_function=vf_sea\n", + ")" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -151,7 +156,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -160,7 +165,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -175,7 +180,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "rmse=0.800: 100%|██████████| 20/20 [00:15<00:00, 1.29it/s]\n" + "rmse=0.801: 100%|██████████| 20/20 [00:19<00:00, 1.05it/s]\n" ] }, { @@ -189,7 +194,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "LinearUCB: 100%|██████████| 18900/18900 [00:17<00:00, 1083.03it/s]\n" + "LinearUCB: 100%|██████████| 18900/18900 [00:15<00:00, 1228.89it/s]\n" ] }, { @@ -205,36 +210,36 @@ "name": "stderr", "output_type": "stream", "text": [ - "MostPopular: 100%|██████████| 18900/18900 [00:02<00:00, 6731.79it/s]\n" + "MostPopular: 100%|██████████| 18900/18900 [00:01<00:00, 10166.16it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "EnsebleAgent\n", - "Starting EnsebleAgent Training\n" + "EnsembleAgent\n", + "Starting EnsembleAgent Training\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "rmse=0.801: 100%|██████████| 20/20 [00:18<00:00, 1.08it/s]\n" + "rmse=0.800: 100%|██████████| 20/20 [00:17<00:00, 1.15it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Ended EnsebleAgent Training\n" + "Ended EnsembleAgent Training\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "EnsebleAgent: 100%|██████████| 18900/18900 [00:22<00:00, 844.46it/s] \n" + "EnsembleAgent: 100%|██████████| 18900/18900 [00:21<00:00, 872.49it/s] \n" ] } ], @@ -255,7 +260,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -271,7 +276,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -286,7 +291,7 @@ "Computing interaction 20 with UserCumulativeInteraction\n", "Computing interaction 50 with UserCumulativeInteraction\n", "Computing interaction 100 with UserCumulativeInteraction\n", - "UserCumulativeInteraction spent 0.86 seconds executing Hits metric\n", + "UserCumulativeInteraction spent 0.62 seconds executing Hits metric\n", "\n", "Evaluating MostPopular\n", "\n", @@ -295,16 +300,16 @@ "Computing interaction 20 with UserCumulativeInteraction\n", "Computing interaction 50 with UserCumulativeInteraction\n", "Computing interaction 100 with UserCumulativeInteraction\n", - "UserCumulativeInteraction spent 0.38 seconds executing Hits metric\n", + "UserCumulativeInteraction spent 0.35 seconds executing Hits metric\n", "\n", - "Evaluating EnsebleAgent\n", + "Evaluating EnsembleAgent\n", "\n", "Computing interaction 5 with UserCumulativeInteraction\n", "Computing interaction 10 with UserCumulativeInteraction\n", "Computing interaction 20 with UserCumulativeInteraction\n", "Computing interaction 50 with UserCumulativeInteraction\n", "Computing interaction 100 with UserCumulativeInteraction\n", - "UserCumulativeInteraction spent 0.38 seconds executing Hits metric\n" + "UserCumulativeInteraction spent 0.35 seconds executing Hits metric\n" ] } ], @@ -319,7 +324,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -361,11 +366,11 @@ " \n", " \n", " LinearUCB\n", - " 1.936508\n", - " 3.47619\n", - " 6.333333\n", - " 14.084656\n", - " 22.915344\n", + " 1.920635\n", + " 3.608466\n", + " 6.42328\n", + " 14.058201\n", + " 22.645503\n", " \n", " \n", " MostPopular\n", @@ -376,26 +381,26 @@ " 16.703704\n", " \n", " \n", - " EnsebleAgent\n", - " 1.846561\n", - " 3.470899\n", - " 6.492063\n", - " 14.132275\n", - " 23.026455\n", + " EnsembleAgent\n", + " 1.820106\n", + " 3.433862\n", + " 6.37037\n", + " 13.989418\n", + " 22.89418\n", " \n", " \n", "\n", "" ], "text/plain": [ - " 5 10 20 50 100\n", - "Model \n", - "LinearUCB 1.936508 3.47619 6.333333 14.084656 22.915344\n", - "MostPopular 1.666667 2.904762 5.126984 10.063492 16.703704\n", - "EnsebleAgent 1.846561 3.470899 6.492063 14.132275 23.026455" + " 5 10 20 50 100\n", + "Model \n", + "LinearUCB 1.920635 3.608466 6.42328 14.058201 22.645503\n", + "MostPopular 1.666667 2.904762 5.126984 10.063492 16.703704\n", + "EnsembleAgent 1.820106 3.433862 6.37037 13.989418 22.89418" ] }, - "execution_count": 19, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" }