In [1]:
import os
import shutil
import psycopg2
import numpy as np
import pandas as pd
import pandas.io.sql as sqlio
from zipfile import ZipFile, ZIP_DEFLATED
import warnings
warnings.filterwarnings('ignore')

US = 'postgres'
PA = 'RuePierre0614'
HO = 'dev.tng.cs.wpi.edu'
PO = 5432

conn = psycopg2.connect(user=US, password=PA, host=HO, port=PO, database='cas_core')
conn.set_session(autocommit=True)

In [2]:
all_eps = pd.read_csv('experiment_results.csv')['sequence_id'].unique()

In [3]:
ddets = pd.read_csv('raw_ddets.csv')

print(f'{pd.datetime.now()}: Setting up database.')

conn.cursor().execute(open('sql_scripts/setup.sql', 'r').read())

batch_size = 1000000

done = False
while not done:
    for eps in all_eps:
        if f'{eps}.zip' not in os.listdir('output/'):
            print(eps)
            print(f'{pd.datetime.now()}: Creating database tables.')
            with open('sql_scripts/data_builder.sql', 'rt') as fin:
                with open('tmp.sql', 'wt') as fout:
                    for line in fin:
                        fout.write(line.replace('epsvar', eps))
            conn = psycopg2.connect(user=US, password=PA, host=HO, port=PO, database='cas_core')
            conn.set_session(autocommit=True)
            conn.cursor().execute(open('tmp.sql', 'r').read())
            os.remove('tmp.sql')

            print(f'{pd.datetime.now()}: Exporting database tables.')
            if os.path.exists('tmp'):
                shutil.rmtree('tmp')
            os.makedirs('tmp')
            for table in ['exp_slogs', 'exp_plogs', 'exp_alogs', 'priors']:
                data = sqlio.read_sql_query(f'select * from {table}', conn)
                if table == 'priors':
                    data = data.merge(ddets, how='left', on='district_alias')
                    data = data.drop('district_alias', axis=1)
                data.to_csv(os.path.join('tmp', f'{table}.csv'), mode='w', index=False, header=True)

            if len(pd.read_csv('tmp/exp_slogs.csv')) > 0:
                print(f'{pd.datetime.now()}: Compressing database tables.')
                with ZipFile(f'output/{eps}.zip', 'w', ZIP_DEFLATED) as archive:
                    for file in os.listdir('tmp'):
                        archive.write(os.path.join('tmp', file), file)
            else:
                print(f'{pd.datetime.now()}: No experimental data available.')
            shutil.rmtree('tmp')
        done = True
conn.close()

2021-12-12 14:38:05.949997: Setting up database.
