In [None]:
# SET GLOBALS

import os, csv , re
from os import path
import pandas as pd
from sqlalchemy import create_engine, text

DB_ENGINE = create_engine('')
OUTPUT_ROOT = ''

In [None]:
# APC Files are seperated into individual *.dat files broken down by system, data_type, date, and bus_id
# This creates a listing of indivdual files represented in the APC_RAW_DATAPOINTS table.

data = pd.read_sql('''
        SELECT 
            system_id,record_type,file_date,bus_id 
        FROM apc_raw_datapoints 
        GROUP BY system_id,record_type,bus_id,file_date''',
        DB_ENGINE)

In [None]:
# This calculates the file path for each of the individual files represented in the APC_RAW_DATAPOINTS table.
# This also checks to see whether or not the file exists and filters the list down to files needing to be exported.

def filepath(row):
    if row.record_type == 'appl':
        return path.join(
            OUTPUT_ROOT,
            row.system_id,
            'data_{0}'.format(row.record_type),
            'apc',
            row.file_date.strftime('%Y%m%d'),
            '{0}.dat'.format(row.bus_id)
        )
        
    if row.record_type == 'nova':
        return path.join(
            OUTPUT_ROOT,
            row.system_id,
            'data_{0}'.format(row.record_type),
            row.file_date.strftime('%Y%m%d'),
            '{0}.dat'.format(row.bus_id)
        )
            
data['filepath'] = data.apply(filepath,axis=1)
data['export'] = data.apply(lambda x: os.path.isfile(x.filepath), axis=1)
exports = data[data['export']==False]

In [None]:
exports.reset_index(inplace=True)

In [None]:
re.sub(r'\\\d{4}.dat$', '', exports['filepath'].iloc[0])

In [None]:
def create_leaf_dir(row):
    leaf_dir = re.sub(r'\\[0-9]{4}.dat$', '', exports['filepath'].iloc[0])
    
    # Create Leaf directory
    if row.record_type == 'appl':
        if not os.path.exists(leaf_dir):
            os.makedirs(leaf_dir)
    elif row.record_type == 'nova':
        if not os.path.exists(leaf_dir):
            os.makedirs(leaf_dir)

def writefile(row):
    # Create dataframe with all records from the file in the row
    export_df = pd.read_sql_query('''
        SELECT 
            raw_record 
        FROM 
            apc_raw_datapoints 
        WHERE 
            file_date=TO_DATE(:file_date,'YYYY-MM-DD') 
        AND 
            bus_id=:bus_id
        ORDER BY
            line_num''',
        DB_ENGINE,
        params={
            'file_date':str(row.file_date.date()),
            'bus_id':row.bus_id
        })
    print('Writing {0} of {1} --- {2}'.format(row.name+1, len(exports), row.filepath))
    export_df.to_csv(row.filepath,index=False,header=False,quoting=csv.QUOTE_NONE,sep="|")
    
exports.apply(create_leaf_dir,axis=1)
exports.apply(writefile,axis=1)