In [1]:
from datetime import datetime

### Setup paths

In [2]:
# tell Python about that additional module import path
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
# add configuration
sys.path.append(module_path)

In [3]:
import src.dataproc as dp

### Run scraper

In [4]:
# run scraping (for single day) - should take a minute
scraped_data = dp.batch_dump_parliament_votings(term=9, dates=['2019-11-12'])

2021-09-04 19:30:06.696720 --- 2019-11-12 --- start
2021-09-04 19:30:46.751737 --- 2019-11-12 --- scraped


### Setup MongoDB client

In [5]:
client = dp.MyMongoClient()
client.set_votes_collection()

# check if there are any records in db
client.db.coll.find({}, {'date': 1}).distinct('date')

[]

### Insert data to MongoDB

In [6]:
client.db.coll.insert_many(documents=scraped_data, ordered=False)

<pymongo.results.InsertManyResult at 0x7fca8930fc80>

### Run scraper and insert data to MongoDB

In [7]:
print(datetime.now())
scraped_data = dp.batch_dump_parliament_votings(term=9,
                                                dates=['2019-11-13'],
                                                dates_to_ignore=client.db.coll.find({}, {'date': 1}).distinct('date'),
                                                insert_to_db=True)
print(datetime.now())

2021-09-04 19:30:46.898432
2021-09-04 19:30:47.554290 --- 2019-11-13 --- start
2021-09-04 19:31:40.240704 --- 2019-11-13 --- scraped & inserted
2021-09-04 19:31:40.241604


In [8]:
# check database for number of documents
client.db.coll.estimated_document_count()

5980

In [9]:
# check database for inserted dates of votings
client.db.coll.find({}, {'date': 1}).distinct('date')

['2019-11-12', '2019-11-13']