### RethinkDB - Connection - http://rethinkdb.com/api

In [1]:
import rethinkdb as r
rdb = r.connect(host='127.0.0.1')

### RethinkDB - Bases & Tables

In [2]:
rdb_bases = r.db_list().run(rdb)
if 'yanc' not in rdb_bases :
    r.db_create('yanc').run(rdb)

rdb_tables = r.db('yanc').table_list().run(rdb)
for tabl in ['users', 'feeds', 'articles', 'articles_users'] :
    if tabl not in rdb_tables :
        r.db('yanc').table_create(tabl).run(rdb)
print('Bases = %s' % str(r.db_list().run(rdb)))
print('Tables in yanc = %s' % str(r.db('yanc').table_list().run(rdb)))
rdb.use('yanc')

Bases = ['rethinkdb', 'test', 'yanc']
Tables in yanc = ['articles', 'articles_users', 'feeds', 'users']


### Feeds : creation & list - http://pandas.pydata.org/pandas-docs/version/0.18.1/index.html

In [9]:
if r.table('feeds').count().run(rdb) < 1 :
    r.table('feeds').delete().run(rdb)
    r.table('feeds').insert({'name':'courrier international' , 'url':'http://www.courrierinternational.com/feed/all/rss.xml'}).run(rdb)
    r.table('feeds').insert({'name':'BFM les experts' , 'url':'http://podcast.bfmbusiness.com/channel78/BFMchannel78.xml'}).run(rdb)
    r.table('feeds').insert({'name':'Les crises' , 'url':'http://feeds.feedburner.com/les-crises-fr'}).run(rdb)

In [51]:
import pandas as pd, numpy as np
df_feeds = pd.DataFrame(list(r.table('feeds').run(rdb)))
# print(sorted(list(df_feeds['id'])))
df_feeds.head()

Unnamed: 0,description,id,image_url_online,name,url
0,Journal Marianne,3d41ba6c696849b2beaad891b82c0d325f0a45c1,http://www.marianne.net/sites/default/files/ne...,Marianne,http://www.marianne.net/rss.xml
1,,1bc726b5bf885f3e163c12abe0a7284d22b661f1,http://www.les-crises.fr/wp-content/themes/les...,Les-Crises.fr,http://feeds.feedburner.com/les-crises-fr
2,,6db05e350059a0108af555692c6100cd3036ecd6,http://a.amz.mshcdn.com/assets/header_logo.v2....,Mashable,http://feeds.mashable.com/mashable
3,,4a0c918f9b381c7026e24a0238269f94f65c7750,http://www.courrierinternational.com/sites/ci_...,Courrier International,http://www.courrierinternational.com/feed/all/...


### Users : creation and feeds association

In [25]:
liste_feeds_ids = r.table('feeds')['id'].run(rdb)
# On associe tous les feeds au user aure
r.table('users').insert({'id':'aure', 'feeds':liste_feeds_ids}, conflict='replace').run(rdb)
df_users = pd.DataFrame(list(r.table('users').run(rdb)))
print(list(r.table('users').run(rdb)))
df_users

[{'feeds': ['f1af536f-9985-4209-bef3-fee198a3a46a', 'abb1cde6-d1ce-41d9-aac8-b0a74335a0c6', '149c16ba-b395-48b0-9af8-db4159d3145f'], 'id': 'aure'}]


Unnamed: 0,feeds,id
0,"[f1af536f-9985-4209-bef3-fee198a3a46a, abb1cde...",aure


### Articles per user

In [30]:
df_artusr = pd.DataFrame(list(r.table('articles_users').run(rdb)))
# print(sorted(list(df_artusr['id'])))
df_artusr.head()

Unnamed: 0,aure,id,ts_published
0,"{'score': 0, 'sorting': '10001610252359', 'tag...",05bec3d42a8b56edd678f920225ac6054e550a62,2016-10-25 23:59:04+02:00
1,"{'score': 0, 'sorting': '10001610281241', 'tag...",0d874c327b7445b16d6b0cb0f5017b1e0b62d6f9,2016-10-28 12:41:17+02:00
2,"{'score': 0, 'sorting': '10001610261526', 'tag...",01ccd9c27becac25ae8f4ebbab7cbbae0f10e2db,2016-10-26 15:26:45+02:00
3,"{'score': 0, 'sorting': '10001610281539', 'tag...",09b1c9b6919f30f39ea2a301253ea67c0792fa72,2016-10-28 15:39:02+02:00
4,"{'score': 0, 'sorting': '10001610272340', 'tag...",1f6a5e84d167e8c39269fe633fd6d85482437726,2016-10-27 23:40:23+02:00


In [48]:
import json
for obj in r.table('articles_users').limit(10).map(lambda doc : doc.merge({'ts_published' : doc['ts_published'].to_iso8601()})).run(rdb) :
    print(json.dumps(obj))

{"aure": {"score": 0, "sorting": "10001610252359", "tags": ["tag1", "tag2"], "status": 0}, "id": "05bec3d42a8b56edd678f920225ac6054e550a62", "ts_published": "2016-10-25T23:59:04+02:00"}
{"aure": {"score": 0, "sorting": "10001610281241", "tags": ["tag1", "tag2"], "status": -666}, "id": "0d874c327b7445b16d6b0cb0f5017b1e0b62d6f9", "ts_published": "2016-10-28T12:41:17+02:00"}
{"aure": {"score": 0, "sorting": "10001610261526", "tags": ["tag1", "tag2"], "status": 0}, "id": "01ccd9c27becac25ae8f4ebbab7cbbae0f10e2db", "ts_published": "2016-10-26T15:26:45+02:00"}
{"aure": {"score": 0, "sorting": "10001610281539", "tags": ["tag1", "tag2"], "status": 0}, "id": "09b1c9b6919f30f39ea2a301253ea67c0792fa72", "ts_published": "2016-10-28T15:39:02+02:00"}
{"aure": {"score": 0, "sorting": "10001610272340", "tags": ["tag1", "tag2"], "status": 0}, "id": "1f6a5e84d167e8c39269fe633fd6d85482437726", "ts_published": "2016-10-27T23:40:23+02:00"}
{"aure": {"score": 0, "sorting": "10001610280920", "tags": ["tag1",