Skip to content

Commit

Permalink
i hate writing commit messages...
Browse files Browse the repository at this point in the history
  • Loading branch information
awarebayes committed Aug 7, 2020
1 parent af0f05c commit 742fc34
Show file tree
Hide file tree
Showing 7 changed files with 110 additions and 99 deletions.
110 changes: 20 additions & 90 deletions examples/[Library Basics]/2. Different Pandas Backends.ipynb
Expand Up @@ -45,83 +45,23 @@
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"execution_count": 3,
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "dfdff54fdef24df59d3027095d099ec7",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, max=20000263.0), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "00aaa1f880114a7ea1b5f00a3ce8a46c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, max=20000263.0), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "04fadc9450294367aed2aba5e5257bd5",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, max=138493.0), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"CPU times: user 57.4 s, sys: 2.15 s, total: 59.5 s\n",
"Wall time: 59.6 s\n"
]
"name": "stderr",
"text": "100%|██████████| 20000263/20000263 [00:13<00:00, 1469488.15it/s]\n100%|██████████| 20000263/20000263 [00:15<00:00, 1265183.17it/s]\n100%|██████████| 138493/138493 [00:06<00:00, 19935.53it/s]\nCPU times: user 41.6 s, sys: 1.89 s, total: 43.5 s\nWall time: 43.5 s\n"
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<recnn.data.env.FrameEnv at 0x7fa50c5a1490>"
]
"text/plain": "<recnn.data.env.FrameEnv at 0x7f28bd9fe7c0>"
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
"execution_count": 3
}
],
"source": [
Expand Down Expand Up @@ -160,19 +100,15 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2020-08-07 21:24:00,759\tINFO resource_spec.py:204 -- Starting Ray with 7.62 GiB memory available for workers and up to 3.81 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).\n",
"2020-08-07 21:24:01,002\tWARNING services.py:923 -- Redis failed to start, retrying now.\n",
"2020-08-07 21:24:01,223\tINFO services.py:1163 -- View the Ray dashboard at \u001b[1m\u001b[32mlocalhost:8265\u001b[39m\u001b[22m\n"
]
"name": "stderr",
"text": "2020-08-07 23:31:23,301\tINFO resource_spec.py:204 -- Starting Ray with 6.88 GiB memory available for workers and up to 3.46 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).\n2020-08-07 23:31:23,522\tWARNING services.py:923 -- Redis failed to start, retrying now.\n2020-08-07 23:31:23,742\tINFO services.py:1163 -- View the Ray dashboard at \u001b[1m\u001b[32mlocalhost:8265\u001b[39m\u001b[22m\n"
}
],
"source": [
Expand All @@ -188,29 +124,23 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 138493/138493 [00:07<00:00, 18951.36it/s]\n",
"CPU times: user 12.1 s, sys: 1.4 s, total: 13.5 s\n",
"Wall time: 20.8 s\n"
]
"name": "stderr",
"text": "100%|██████████| 138493/138493 [00:07<00:00, 19253.50it/s]\nCPU times: user 12.1 s, sys: 1.37 s, total: 13.5 s\nWall time: 21.5 s\n"
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<recnn.data.env.FrameEnv at 0x7fafa57f8cd0>"
]
"text/plain": "<recnn.data.env.FrameEnv at 0x7f221581cbe0>"
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
"execution_count": 4
}
],
"source": [
Expand Down Expand Up @@ -306,9 +236,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
"version": "3.8.5-final"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
}
72 changes: 72 additions & 0 deletions examples/[Library Basics]/temp.ipynb
@@ -0,0 +1,72 @@
{
"metadata": {
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5-final"
},
"orig_nbformat": 2,
"kernelspec": {
"name": "python38564bitfba12b29602d49fd94d253df959599f4",
"display_name": "Python 3.8.5 64-bit"
}
},
"nbformat": 4,
"nbformat_minor": 2,
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# == recnn ==\n",
"import sys\n",
"sys.path.append(\"../../\")\n",
"import recnn"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# but you can also set it directly:\n",
"recnn.pd.set(\"pandas\")\n",
"frame_size = 10\n",
"batch_size = 25"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": "'/tmp'"
},
"metadata": {},
"execution_count": 5
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
]
}
1 change: 1 addition & 0 deletions recnn/__init__.py
@@ -1 +1,2 @@
from recnn import data, utils, nn, rep
from recnn.data import pd
1 change: 1 addition & 0 deletions recnn/data/__init__.py
Expand Up @@ -2,3 +2,4 @@
from .utils import *
from .env import *
from .dataset_functions import *
from .pandas_backend import pd
16 changes: 12 additions & 4 deletions recnn/data/dataset_functions.py
@@ -1,5 +1,5 @@
from recnn.data.utils import make_items_tensor

from .pandas_backend import pd
"""
What?
+++++
Expand Down Expand Up @@ -41,6 +41,11 @@ def prepare_dataset(**kwargs):
build_data_pipeline function, and it is passed down the function chain. If needed, it will be used. Otherwise, ignored
"""

def try_progress_apply(dataframe, function):
try:
return dataframe.progress_apply(function)
except AttributeError:
return dataframe.apply(function)

def prepare_dataset(df, key_to_id, frame_size, env, sort_users=False, **kwargs):

Expand All @@ -49,14 +54,17 @@ def prepare_dataset(df, key_to_id, frame_size, env, sort_users=False, **kwargs):
[1, 34, 123, 2000], recnn makes it look like [0,1,2,3] for you.
"""

df['rating'] = df['rating'].progress_apply(lambda i: 2 * (i - 2.5))
df['movieId'] = df['movieId'].progress_apply(lambda i: key_to_id.get(i))
df['rating'] = try_progress_apply(df['rating'], lambda i: 2 * (i - 2.5))
df['movieId'] = try_progress_apply(df['movieId'], lambda i: key_to_id.get(i))

users = df[['userId', 'movieId']].groupby(['userId']).size()
users = users[users > frame_size]
if sort_users:
users = users.sort_values(ascending=False)
users = users.index

if pd.get_type() == "modin":
df = df._to_pandas()
ratings = df.sort_values(by='timestamp').set_index('userId').drop('timestamp', axis=1).groupby('userId')

# Groupby user
Expand All @@ -68,7 +76,7 @@ def app(x):
user_dict[int(userid)]['items'] = x['movieId'].values
user_dict[int(userid)]['ratings'] = x['rating'].values

ratings.progress_apply(app)
try_progress_apply(ratings, app)

env.user_dict = user_dict
env.users = users
Expand Down
4 changes: 2 additions & 2 deletions recnn/data/env.py
@@ -1,6 +1,6 @@
from . import utils, dataset_functions as dset_F
from .pandas_backend import pd
import pickle
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import torch
import numpy as np
Expand Down Expand Up @@ -101,7 +101,7 @@ def __init__(self, embeddings, ratings, test_size=0.05, min_seq_size=10,
self.embeddings = movies_embeddings_tensor
self.key_to_id = key_to_id
self.id_to_key = id_to_key
self.ratings = pd.read_csv(ratings)
self.ratings = pd.get().read_csv(ratings)

self.user_dict = None
self.users = None # filtered keys of user_dict
Expand Down
5 changes: 2 additions & 3 deletions recnn/data/utils.py
@@ -1,7 +1,7 @@

import numpy as np
import torch
import pandas as pd
from .pandas_backend import pd

# helper function similar to pandas.Series.rolling
def rolling_window(a, window):
Expand Down Expand Up @@ -112,8 +112,7 @@ def padder(x):


def sort_users_itemwise(user_dict, users):
return pd.Series(dict([(i, user_dict[i]['items'].shape[0]) for i in users])).sort_values(ascending=False).index

return pd.get().Series(dict([(i, user_dict[i]['items'].shape[0]) for i in users])).sort_values(ascending=False).index

def prepare_batch_dynamic_size(batch, item_embeddings_tensor, embed_batch=None):
item_idx, ratings_t, sizes_t, users_t = get_irsu(batch)
Expand Down

0 comments on commit 742fc34

Please sign in to comment.