In [1]:
import os
import json
import time
from tqdm import tqdm
from models import *
from sqlmodel import create_engine, Session

In [2]:
start_time = time.time()

In [3]:
# Define the path to the directory
directory_path = 'data/'

# List all files in the directory
files = os.listdir(directory_path)

In [4]:
# len(files)

In [5]:
# Create the PostgreSQL engine
DATABASE_URL = "postgresql://postgres:mysecretpassword@localhost/db1"
engine = create_engine(DATABASE_URL, echo=False)

# Create the tables in the database
SQLModel.metadata.create_all(engine)

In [6]:
# i=0
for file_name in tqdm(files):
    file_path = os.path.join(directory_path, file_name)
    
    if os.path.isfile(file_path):
        with open(file_path, 'r') as f:
            try:
                data = json.load(f)
            except json.JSONDecodeError as e:
                print(f'Error reading {file_name}: {e}')
                continue
            
            if not data:
                # print(f'No data in {file_name}')
                continue
            # print(f"file name: {file_name}\n\n")
            # print(len(data))
            
            
            with Session(engine) as session:
                for item in data:
                    controller_participant = Participant(kb_name=item['controller_id'][0], kb_id=item['controller_id'][1])
                    controlled_participant = Participant(kb_name=item['controlled_id'][0], kb_id=item['controlled_id'][1])
                    
                    session.add(controller_participant)
                    session.add(controlled_participant)
                    session.commit()
                    
                    session.refresh(controller_participant)
                    session.refresh(controlled_participant)
                    
                    
                    controller_participant_description = ParticipantDescription(description=item['controller'], participant_id=controller_participant.id)
                    controlled_participant_description = ParticipantDescription(description=item['controlled'], participant_id=controlled_participant.id)
                    
                    session.add(controller_participant_description)
                    session.add(controlled_participant_description)
                    session.commit()
                    
                    # session.refresh(controller_participant_description)     #optional
                    # session.refresh(controlled_participant_description)     #optional
                    
                    
                    interaction = Interaction(controller= controller_participant.id, controlled= controlled_participant.id, polarity=item['polarity'], directed=True if item['label'].split('_')[-1]=='regulation' else False)
                    session.add(interaction)
                    session.commit()
                    session.refresh(interaction)
                    
                    
                    journal = Journal(name='None', impact_factor=None)
                    session.add(journal)
                    session.commit()
                    session.refresh(journal)
                    
                    
                    article = Article(provenance=None, url=None, name='None', publish_date=None, journal_id=journal.id)
                    session.add(article)
                    session.commit()
                    session.refresh(article)
                    
                    
                    significance = Significance(type=None, value=None, secondary_value=None, article_id=article.id)
                    session.add(significance)
                    # session.commit()                                        #optional
                    # session.refresh(significance)                           #optional
                    
                    
                    evidence = Evidence(text=None, markup=None, article_id=article.id, interaction_id=interaction.id)
                    session.add(evidence)
                    session.commit()
                    # session.refresh(evidence)                               #optional
            
            # if i==100:
            #     break
            # i+=1

100%|██████████| 914/914 [02:21<00:00,  6.44it/s]


In [7]:
print(f"Elapsed time: {time.strftime('%H:%M:%S', time.gmtime(time.time() - start_time))}")

Elapsed time: 00:02:22
