# Make Datasets
This module reads the Postgres data and stores them in csv files converting the data types to their python equivalents.

In [1]:
import os
import pandas as pd

from configs.config import get_config
from src.database.aact_db import AACTDao
from src.database.sql import Queryator

In [2]:
# -----------------------------------------------------------------------------#
#                               SETUP                                          #
# -----------------------------------------------------------------------------#
# Queryator generates basic sql queries 
# AACTDao is data access object for the AACT database. It exposes methods
# to connect to the database and run queries.
# The directories dictionary contains the directory structure for the data
# directories. The raw data will be stored in the "/data/raw/"" directory.
qg = Queryator()
aact = AACTDao()
directories = get_config(section="directories")

Initializing AACT database.
Acquiring connection
Obtained connection to AACT database.


In [3]:
# -----------------------------------------------------------------------------#
#                                TABLE                                         #
# -----------------------------------------------------------------------------#
def get_table_metadata(table):
    '''Get_table_metadata
    Obtains metadata for the table, including the shape and the schema. The
    schema includes the column names and the data types.

    Parameter
    ---------
    table : str
        Contains the name of hte table to be evaluated.

    Returns
    -------
    dict : Containing two nested dictionaries for the shape and schema.
    '''
    
    meta = {}
    nrows_query = qg.nrows(table)
    ncols_query = qg.ncols(table)    
    schema_query = qg.columns(table, True)

    nrows = aact.read_table(nrows_query, coerce_float=True)
    ncols = aact.read_table(ncols_query, coerce_float=True)
    cols_dtypes = aact.read_table(schema_query)    

    meta['shape'] = {'nrows': nrows, 'ncols': ncols}
    meta['schema'] = cols_dtypes
    return meta

In [None]:
# -----------------------------------------------------------------------------#
#                                STUDIES                                       #
# -----------------------------------------------------------------------------#
def get_studies():
    data = aact.read("studies")
    df = pd.DataFrame(data, )



['character varying' 'date' 'text' 'integer' 'boolean'
 'timestamp without time zone']
