# 06. Feeding MongoDB

This notebook introduces a seamless integration process, enabling the transfer of the enriched DataFrame into `MongoDB`. Leveraging the robust capabilities of MongoDB, the script efficiently stores the comprehensive dataset, ensuring a scalable and organized database for further analysis and retrieval.

In [2]:
# Import Libraries

import sys
import time
import pandas as pd
from tqdm.notebook import tqdm
from pymongo import MongoClient

# from pymongo import MongoClient
from IPython.display import clear_output

### Import dataframes

In [3]:
articles = pd.read_csv('../data/pubmed_articles_clean.csv')
articles.head(2)

Unnamed: 0,DOI,title,authors,affiliations,journal,year,month,volume,first_page,last_page,PMID,PMCID,abstract,href,json_href,first_author,last_author
0,10.1177/17585732221102399,Surgical management of the spastic elbow,"['Weisang Luo 1 ', ' Matthew Nixon 1 2']","['Countess of Chester Hospital, Chester, UK.',...",Shoulder Elbow,2023.0,Oct,15(5),534,543,37811394.0,PMC10557929,['Background: We performed a retrospective rev...,https://doi.org/10.1177/17585732221102399,https://api.crossref.org/works/10.1177/1758573...,Weisang Luo 1,Matthew Nixon 1 2
1,10.1007/s11571-022-09871-6,Three-dimensional memristive Morris-Lecar mode...,"['Han Bao 1 ', ' Xihong Yu 1 ', ' Quan Xu 1...","[""School of Microelectronics and Control Engin...",Cogn Neurodyn,2023.0,Aug,17(4),1079,1092,37522038.0,PMC10374513,['To characterize the magnetic induction flow ...,https://doi.org/10.1007/s11571-022-09871-6,https://api.crossref.org/works/10.1007/s11571-...,Han Bao 1,Bocheng Bao 1


In [4]:
genderize = pd.read_csv('../data/genderize_df.csv')
genderize.head(2)

Unnamed: 0,_id,fst_auth_name,fst_auth_gd,fst_auth_gd_prb,lst_auth_name,lst_auth_gd,lst_auth_gd_prb
0,10.1177/17585732221102399,Weisang,,0.0,Matthew,male,1.0
1,10.1007/s11571-022-09871-6,Han,male,0.73,Bocheng,male,0.83


### Transform to dict

In [23]:
articles = articles.rename(columns = {'DOI': '_id'})

In [26]:
articles_dict = articles.to_dict(orient = 'records')

In [5]:
genderize_dict = genderize.to_dict(orient = 'records')

### Export to MongoDB

In [7]:
str_conn = 'mongodb://localhost:27017/'
cursor = MongoClient(str_conn)
cursor

MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True)

In [8]:
db = cursor['publications']

In [30]:
colec = db['pubmed']
colec

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'publications'), 'pubmed')

In [45]:
for i in range(len(articles)):
    try:
        colec.insert_one(articles_dict[i])
    except:
        continue

In [9]:
colec_2 = db['genderize']
colec_2

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'publications'), 'genderize')

In [11]:
for i in range(len(genderize)):
    try:
        colec_2.insert_one(genderize_dict[i])
    except:
        continue

In [12]:
cursor.close()