## IMPORTS

In [1]:
import glob
import os

import pandas as pd

## GLOBALS

In [17]:
SCENARIO_CODES = {
    'business_as_usual_5%': '5BAU',
    'concentrated_20%': '20CONC',
    'dispersed_20%': '20DISP',
    'targeted_35%': '35TRGT'
}
FORECAST_CODES = {
    'actual': 'Pwr1HB',
    'forecast_one_hour': 'FcstHA',
    'forecast_four_hour': 'Fcst4HA',
    'forecast_six_hour': 'Fcst6HA',
    'forecast_one_day': 'FcstDA'
}

# ROOT_DIR = 'data'
ROOT_DIR = 'data/test'
PERIOD_DIRS = ['2008-2010']
SCENARIO_DIRS = [
    SCENARIO_CODES['business_as_usual_5%'],
    SCENARIO_CODES['concentrated_20%'],
    SCENARIO_CODES['dispersed_20%'],
    SCENARIO_CODES['targeted_35%']
]
FORECAST_DIRS = [
    FORECAST_CODES['actual'],
    FORECAST_CODES['forecast_one_hour'],
    FORECAST_CODES['forecast_four_hour'],
    FORECAST_CODES['forecast_six_hour'],
    FORECAST_CODES['forecast_one_day']
]

## UTILS

#### GET CSV FILE PATHS

In [28]:
def get_file_paths(period_dirs, scenario_dirs, forecast_dirs):
    """
    Returns a list of CSV file paths within specific period, scenario,
    and forecast directories.

    :param period_dirs: Period directory names
    :type period_dirs: list
    :param scenario_dirs: Scenario directory names
    :type scenario_dirs: list
    :param forecast_dirs: Forecast directory names
    :type forecast_dirs: list
    :return: CSV file paths
    :rtype: list

    """

    ret = []
    for period_dir in period_dirs:
        for scenario_dir in scenario_dirs:
            for forecast_dir in forecast_dirs:
                data_path = '../{r}/{p}/{s}/{f}/'.format(
                    r=ROOT_DIR, p=period_dir, s=scenario_dir, f=forecast_dir)
                ret += glob.glob(os.path.join(data_path, "*.csv"))
    return ret

#### GET DATAFRAMES

In [5]:
def get_dataframes(file_paths):
    """
    Returns a dictionary of dataframes from list of CSV file paths.

    :param file_paths: CSV file paths
    :type file_paths: list
    :return: Dataframes
    :rtype: dict

    """

    ret = {}
    for file_path in file_paths:
        df = pd.read_csv(file_path)
        file_name = os.path.splitext(os.path.basename(file_path))[0]
        ret[file_name] = df
    return ret

#### PRINT HEADS

In [6]:
def print_heads(data_dict):
    """
    Prints file name and dataframe head (first five rows) from data 
    dictionary.
    
    :param data_dict: File names with dataframes
    :type data_dict: dict
    
    """
    
    for file_name, df in data_dict.items():
        print('\n' + file_name + '\n')
        print(df.head())

## TESTS

### PRINT SELECTED HEADS

In [10]:
def print_select_heads(period_dirs, scenario_dirs, forecast_dirs):
    print_heads(get_dataframes(get_file_paths(period_dirs, scenario_dirs, forecast_dirs)))

#### PRINT ALL HEADS

In [31]:
period_dirs = PERIOD_DIRS
scenario_dirs = SCENARIO_DIRS
forecast_dirs = FORECAST_DIRS
print_select_heads(period_dirs, scenario_dirs, forecast_dirs)


0004Pwr1HB

            PlantID =                4
0         Plant Cap =          320.000
1           Date/Time            Value
2  01/01/2008 00:00:00           4.318
3  01/01/2008 01:00:00          34.771
4  01/01/2008 02:00:00          82.155

0003Pwr1HB

            PlantID =                3
0         Plant Cap =          128.000
1           Date/Time            Value
2  01/01/2008 00:00:00           0.989
3  01/01/2008 01:00:00          11.727
4  01/01/2008 02:00:00          27.144

0006Pwr1HB

            PlantID =                6
0         Plant Cap =          272.000
1           Date/Time            Value
2  01/01/2008 00:00:00           0.691
3  01/01/2008 01:00:00          21.632
4  01/01/2008 02:00:00          55.332

0003FcstHA

            PlantID =                   3
0         Plant Cap =             128.000
1           Date/Time               Value
2  01/01/2008 00:00:00          61.802295
3  01/01/2008 01:00:00          57.062765
4  01/01/2008 02:00:00           0.3