In [1]:
import sqlite3
import json

In [2]:
conn = sqlite3.connect('../databases/CovidData.db')
cursor = conn.cursor()

In [3]:
cursor.execute('''
    CREATE TABLE IF NOT EXISTS covid_journals_data (
        id TEXT PRIMARY KEY,
        source TEXT,
        pub_date TEXT,
        pub_timestamp TEXT,
        title TEXT,
        link TEXT,
        authors TEXT,
        keywords TEXT
    )
''')

<sqlite3.Cursor at 0x2433c31e3c0>

In [4]:
from datetime import datetime
with open('../data/combined_sources.json', 'r') as json_file:
    data = json.load(json_file)
    for item in data:
        if item['source'] == 'Doaj':
            # format journal date
            datetime_object = datetime.strptime(item['date'], '%Y-%m-%dT%H:%M:%SZ')
            datetime_timestamp = datetime.timestamp(datetime_object)

            # format authors and affiliations lists
            authors_list = []
            for author in item['authors']:
                authors_list.append({
                    "name": author.get('name', ""),
                    "affiliations": author.get('affiliation', ""),
                })

            # insert db row
            cursor.execute("INSERT OR IGNORE INTO covid_journals_data VALUES(?,?,?,?,?,?,?,?)", (
                item['id'],
                item['source'],
                str(datetime_object),
                str(datetime_timestamp),
                item['title'],
                str([link['url'] for link in item['link']]),
                str(authors_list),
                str(item['keywords']),
            ))
        if item['source'] == 'PubMed':
            # format journal date
            formatted_date_str = f"{item['date']['Year']}-{item['date']['Month']}-{item['date']['Day']}"
            datetime_object = datetime.strptime(formatted_date_str, '%Y-%m-%d')
            datetime_timestamp = datetime.timestamp(datetime_object)

            # format authors and affiliations lists
            authors_list = []
            if isinstance(item['authors'], list):
                for author in item['authors']:
                    affiliations = []
                    if author.get('AffiliationInfo'):
                        if isinstance(author['AffiliationInfo'], list):
                            for affiliation in author.get('AffiliationInfo', []):
                                affiliations.append(affiliation)
                        elif isinstance(author['AffiliationInfo'], dict):
                            affiliations.append(author.get(
                                'AffiliationInfo', {}).get('Affiliation', {}))
                    authors_list.append({
                        "name": f"{author.get('ForeName', '')} {author.get('LastName', '')}",
                        "affiliations": affiliations,
                    })

            # format keywords lists
            keywords_list = []
            for keyword in item.get('keywords', []):
                keywords_list.append(keyword['#text'])

            # insert db row
            cursor.execute("INSERT OR IGNORE INTO covid_journals_data VALUES(?,?,?,?,?,?,?,?)", (
                item['id'],
                item['source'],
                str(datetime_object),
                str(datetime_timestamp),
                item['title'],
                item['link'],
                str(authors_list),
                str(keywords_list),
            ))

In [5]:
conn.commit()
conn.close()