In [None]:
import os
import getpass
import pandas as pd
import numpy as np
import csv
import statsmodels.api as sm
import warnings
import math
import copy
import multiprocessing
import traceback
import hashlib
import signal
from tqdm import tqdm_notebook as tqdm
from sklearn.model_selection import train_test_split
from itertools import combinations
from scipy import stats
from datetime import datetime
from sklearn.metrics import mean_absolute_error
from datetime import datetime
from dateutil.relativedelta import relativedelta
warnings.filterwarnings("ignore")

try: 
    __file__
except:
    curr_dir = os.path.abspath('')
else:
    curr_dir = os.path.dirname(os.path.abspath(__file__))
    
app_root = curr_dir if os.path.basename(curr_dir) != "src" else os.path.dirname(curr_dir)

if getpass.getuser() == "rainfalld":  # docker daemon
    home = os.path.expanduser("~")
    destdir = home                    # /var/cache/rainfall-predictor
else:
    destdir = os.path.join(app_root,'data','manipulated_data')      # non-docker stay in repository


file = os.path.join(destdir,'rainfalldata.csv')
rd = pd.read_csv(file)
file2 = os.path.join(destdir,'ncrainfalldata.csv')
ncrd = pd.read_csv(file2)
rd.Date = pd.to_datetime(rd.Date)
rd = rd.set_index('Date')
ncrd.Date = pd.to_datetime(ncrd.Date)
ncrd = ncrd.set_index('Date')

In [None]:
import json
# this cell takes the stored exogen dictionary that is stored in the Data_Wrangling_CAP1 jupyter notebook
# that was imported above.
try:
    raise NameError()
    %store -r exogen
except NameError:
    f = open(os.path.join(destdir,"exogen.json"),"r")
    exogen = json.load(f)      # read from file, passed from Data_Wrangling
    f.close()


### Function Library
Handles parallel-processing model calculation, mean absolute error calculations, and near-real-time calculation storage

In [None]:
def sarima_model_creation(data, p, d, q, P, D, Q, m, exog=None):
    my_order = [p,d,q]
    my_sorder = [P,D,Q,m]
    sarimamod = sm.tsa.statespace.SARIMAX(data, exog, order=my_order, seasonal_order=my_sorder, 
                                          enforce_stationarity=False, enforce_invertibility=False,
                                          initialization='approximate_diffuse')
    model_fit = sarimamod.fit(disp=0)   # start_params=[0, 0, 0, 0, 1])
    return(model_fit)

In [None]:
def model_creation_pred_one_step(train_data, test_data, exotrain=None, exotest=None, progress_bar=None):
    ''' recursively makes forecast based on provided data for the next month
        args: train_data = large data set to base predictions on
              test_data  = decreasing dataset of data to test model
              exotrain   = exogenous location data that matches the same timeframe of train_data but was not included
              exotest    = exogenous location data that matches the same timeframe of test_data but was not included
        returns: A list of all predictions for the location matching the entire test_data timeframe
    '''
    list_one_step = []
    
    nextMonth = model_based_forecast(train_data, exotrain)
    list_one_step.append(nextMonth[0])             # captures prediction
    progress_bar.update()

    # if test data exists
    if len(test_data) > 1:
        # increment data for next month's iteration
        train_data = pd.concat([train_data, test_data.iloc[[0]]])
        test_data = test_data.drop(test_data.index[0], axis = 0)
        if exotrain is not None:
            exotrain = pd.concat([exotrain, exotest.iloc[[0]]])
            exotest = exotest.drop(exotest.index[0], axis = 0)

        # execute & capture future predictions
        futurePredictions = model_creation_pred_one_step(train_data, test_data, exotrain, exotest, progress_bar)
        # add to list
        list_one_step.extend(futurePredictions)
        
    return(list_one_step)

def model_based_forecast(train_data, exotrain=None):
    ''' creates model from training data & makes a forecast
        args: train_data = DataFrame to build forecasting model
              exotrain   = DataFrame of exogenous location's rainfall data
        returns: FLOAT value of next month's forecast value
    '''
    mod = sarima_model_creation(train_data, p=4, d=0, q=3, P=3, D=0, Q=4, m=12, exog=exotrain)
    # if exists, passing exotrain's prevMonth (december, for forecasting jan), otherwise only forcast based on model
    nextMonth = mod.forecast() if exotrain is None else mod.forecast(exog=exotrain.iloc[[-1]])       # turnary assignment expression
    return(nextMonth)

def maeFinder(train_data, test_data, exotrain=None, exotest=None, pbar=None):
    ''' Function that finds the Mean Absolute Error between test_data and model-based predictions
        args: train_data = large data set to base predictions on
              test_data  = decreasing dataset of data to test model
              exotrain   = exogenous location data that matches the same timeframe of train_data but was not included
              exotest    = exogenous location data that matches the same timeframe of test_data but was not included
              pbar       = Progress Bar object from tqdm, to provide updates to
        returns: FLOAT of Mean Absolute Error value of potential exogenous location when included into model
    '''
    clone_train_data = copy.deepcopy(train_data)
    clone_test_data = copy.deepcopy(test_data)
    clone_exotrain = None if exotrain is None else copy.deepcopy(exotrain)
    clone_exotest = None if exotest is None else copy.deepcopy(exotest)
    
    progressbar = pbar if pbar is not None else tqdm(total=len(test_data),leave=False) # initialize counter
    
    predictions = model_creation_pred_one_step(clone_train_data, clone_test_data, clone_exotrain, clone_exotest, progressbar)
    if pbar is None:
        progressbar.close()
    
    DECIMAL_PRECISION = 9
    mae = round(mean_absolute_error(test_data, predictions), DECIMAL_PRECISION)
    return(mae)


In [None]:
def find_keymae(loc_name, data_targetloc, data_split_size):
    ''' Function to evaluate the current location model.  It finds
        the keymae of the current data frame about a location with a user defined percentage data split.
        args: loc_name = Name of location to use as keyword in json
              data_targetloc = dataframe of specific location and rainfall amounts over time
              data_split_size = percentage in decimal form to define the data split to use to evaluate
    '''
    tr, test = train_test_split(data_targetloc, test_size=data_split_size, shuffle=False)
    keymae = { 'loc_name': loc_name }
    
    shaObj = hashlib.sha1( bytes(data_targetloc.to_csv(), 'utf-8') )
    data_signature = shaObj.hexdigest()
    
    try:
        with open(results_filename, 'r') as all_results_file:
            all_results = json.loads(all_results_file.read())
    except:
        all_results = {}
    
    if  loc_name in all_results and \
        'data_source_sha1' in all_results[loc_name] and \
        data_signature == all_results[loc_name]['data_source_sha1']:
        
         keymae['mae'] = all_results[loc_name]['keymae']
    
    else:
        print(' ', end='', flush=True)   # weird hack that makes progress bars work in forked processes
        with tqdm(desc="(keymae) "+keymae['loc_name'],total=len(test),leave=False) as keymae_pbar:
            keymae['mae'] = maeFinder(tr, test, pbar=keymae_pbar)
        
        # Save calculation to file
        loc_data = {'keymae':keymae['mae'],'data_source_sha1': data_signature }
        if 'exogen' in all_results[loc_name] and bool(all_results[loc_name]['exogen']):  # keep any previously processed exmaes
            loc_data['exogen'] = all_results[loc_name]['exogen']
        update_JSON_file(results_filename, None, (keymae['loc_name'], loc_data) ) # save with default adjuster
        
        # wipe bettermae file of any keymae results since keymae has been recalculated
        def delete_location(data, location):
            if location in data:
                del data[location]
            return(data)
            
        update_JSON_file(bettermae_results_filename, delete_location, (keymae['loc_name'],))
        
    return(keymae)

def initKeymaeWorker(l):
    ''' Constructor function for creating and establishing initial/global 
        variables across process pool.
        args: l = synchronization lock object
    '''
    signal.signal(signal.SIGINT, signal.SIG_IGN)  # Turn off interrupt signal to child process
    global lock
    lock = l


In [None]:
def find_exmae(extr, extest):
    ''' Standalone task method to find mae of a given exogenous variable.  
        Intended to be used as the function for the process pool and handle memory synchronization
        args: extr = exogenous location training data to be used for model training
              extest = exogenous location test data to be evaluated against real data as a potential improvement model predictions
        returns: Dictionary of exmae with columns
        #bettermae state is saved to json file and updated synchronously across all forked processes
    '''
    co = tuple(extr.columns)
    exog_name = '|'.join(co)
    
    shaObj = hashlib.sha1( bytes(pd.concat([extr,extest]).to_csv(), 'utf-8') )
    data_signature = shaObj.hexdigest()
    
    # process syncrhonization on file read
    lock.acquire()
    try:
        with open(results_filename, 'r') as all_results_file:
            all_results = json.loads(all_results_file.read())  
        if 'exogen' not in all_results[keymae['loc_name']]:
            raise ValueError()  # first time run
    except:
        all_results = { keymae['loc_name']: {'exogen':{}} }
    finally:
        lock.release()
    
    exog_dict = all_results[keymae['loc_name']]['exogen']
    if exog_name in exog_dict and \
       'data_source_sha1' in exog_dict[exog_name] and \
       data_signature == exog_dict[exog_name]['data_source_sha1']:
        
        exmae = exog_dict[exog_name]['exmae']
        # Comment out the next line to rebuild allBetterMAE.json if needed
        return { "co": co, "exmae": exmae }
    
    else:
        print(' ', end='', flush=True)    # weird hack that makes progress bars work in forked processes
        pbar_desc="(exmae)"
        with tqdm(desc=pbar_desc,total=len(test),leave=False) as exmae_pbar:
            exmae = maeFinder(tr, test, extr, extest, exmae_pbar)
    
    def save_solved_exmae(all_solutions, targetloc, exogloc, exmae, data_hash):
        ''' handler function for adjustment of JSON relating to results_file, 
            see adjustfn for update_JSON_file()
            args: all_solutions = loaded python-equivalent of json from file
                  targetloc = keyword of target location of current keymae
                  exogloc = exogenous location name that improves the model
                  exmae = value of mean absolute value
                  data_hash = sha1 digest of exog data set used to calculate exmae
            returns: dictionary object 
        '''
        if 'exogen' not in all_solutions[targetloc]:
            all_solutions[targetloc]['exogen'] = {}      # initialize exogen dictionary, when doesn't exist
            
        all_solutions[targetloc]['exogen'][exogloc] = { 'exmae': exmae, 'data_source_sha1': data_hash }
        return(all_solutions)
    
    lock.acquire()
    try:
        # Update status file with solved exmae
        update_JSON_file(results_filename, save_solved_exmae, (keymae['loc_name'], exog_name, exmae, data_signature))
        
        # Update bettermae array based on solved exmae if exmae is better than keymae
        if exmae < keymae['mae']:
            tmp_filename = tmp_bettermae_filename
            update_JSON_file(tmp_filename, None, (exog_name, exmae))          # Save with default adjuster
            
    finally:
        lock.release()
        
    return { "co": co, "exmae": exmae }


def initExmaeWorker(l, targetloc_obj, list_exoloc):
    ''' Constructor function for creating and establishing initial/global 
        variables across process pool.
        args: l = synchronization lock object
              targetloc_obj = {
                  name = target location name keyword
                  keymae = target location keymae value
                  tr = training dataframe object from target location
                  test = testing dataframe object from target location
              }
              list_exoloc = list of exogenous locations related to target location
    '''
    signal.signal(signal.SIGINT, signal.SIG_IGN)  # Turn off interrupt signal to child process
    global lock
    global keymae
    global tr
    global test
    global l_exoloc
    lock = l
    keymae = { 'loc_name': targetloc_obj['name'], 'mae': targetloc_obj['keymae'] }
    tr = targetloc_obj['tr']
    test = targetloc_obj['test']
    l_exoloc = list_exoloc

In [None]:
def targetloc_vars(data, targetlocations, data_split_size, progress_bars):
    ''' Function to find all location's basic prediction model.
        It spawns a pool of processes (# of CPU cores minus 1) to calculate each
        mean absolute error of the model to the data.  Each keymae is printed to
        stdout and stored into the results file.  The function does not complete
        until all keymae values have been calculated.
        args: 
              data : full rainfall DataFrame 
              targetlocations = list of all target location names
              data_split_size = percentage of data set to test
              progress_bars = {
                  'keymae_pbar' = tqdm object for keymae program progress 
                  'total_pbar' = tqdm object for entire program progress
              }
    '''
    # unpack progress_bars
    total_progress = progress_bars['total_pbar']
    keymae_progress = progress_bars['keymae_pbar']
    
    # process keymaes
    poolLock = multiprocessing.Lock()
    process_limit = multiprocessing.cpu_count()-1          # 1 cpu is needed for basic OS functions
    
    def on_success(result):
        print('keymae of {0} = {1}'.format(result['loc_name'],str(result['mae'])), flush=True)
        # update counter of completion
        keymae_progress.update()
        total_progress.update(math.ceil(data.shape[0]*data_split_size))
    
    def on_error(err):
        print("ERROR: {}".format(err), flush=True)
        traceback.print_exception(type(err), err, err.__traceback__) 


    # create pool processes & set global/shared variables
    pool = multiprocessing.Pool(
        processes=process_limit, 
        initializer=initKeymaeWorker, 
        initargs=(poolLock,)
    )
    print("[Exogenous_Variables] Created keymae processing pool with {} processes".format(pool._processes), flush=True)
    
    for loc in targetlocations:
        pool.apply_async(
            find_keymae, 
            args=(loc, data[loc], data_split_size), 
            kwds={}, 
            callback=on_success, 
            error_callback=on_error
        )
    
    pool.close()      # no more tasks can be added for the pool to accomplish
    print("[Exogenous_Variables] Pool executing {0} tasks across {1} processes".format(len(targetlocations), pool._processes), flush=True)
    try:
        print("[Exogenous_Variables] waiting for keymae workers...", flush=True)
        pool.join()       # tell parent to wait until all tasks are accomplished by the process pool
    except KeyboardInterrupt:
        print ("\nMultiprocessing Pool: KeyboardInterrupt. Terminating keymae workers...")
        pool.terminate()
        pool.join()
        raise KeyboardInterrupt() # continue bubble up to kill parent process

    # Collect all Keymae values that were found
    if os.path.isfile(results_filename):
        with open(results_filename, 'r') as all_results_file:
            all_results = json.loads(all_results_file.read())

        keymae_storage = {}                                          # initialize storage dictionary
        for loc_name,result in all_results.items():                   # extract keymae values only
            keymae_storage[loc_name] = result['keymae']
            
        return(keymae_storage)
    else:    
        raise FileNotFoundError("Missing file {}".format(results_filename))


In [None]:
def exogenous_var(obj_targetloc, data_split_size, l_exoloc, progress_bars):
    ''' Function to evaluate a location's exogenous variables and their affect on the prediction model
        It spawns a pool of processes (# of CPU cores minus 1) to calculate each potential
        exogenous location's potential improvement of the model.  Each exmae is printed to
        stdout and if improved, it is stored into the bettermae dictionary.  The target location
        does not complete until all exmaes have been calculated.
        args: 
              obj_targetloc = {
                 'data' : DataFrame
                 'name' : location name
                 'keymae' : value
              }
              data_split_size = percentage in decimal form to define the data split to use to evaluate
              l_exoloc = list of exogenous locations to the ncloc parameter
              progress_bars = {
                  'exmae_pbar' = tqdm object for exmae program progress 
                  'total_pbar' = tqdm object for entire program progress
              }
    '''
    # unpack progress_bars
    total_progress = progress_bars['total_pbar']
    exmae_progress = progress_bars['exmae_pbar']
    
    # process exmaes
    poolLock = multiprocessing.Lock()
    process_limit = multiprocessing.cpu_count()-1          # 1 cpu is needed for basic OS functions
    progressbar = tqdm(total=len(l_exoloc),leave=False)                # initialize counter (regular)
    
    def on_success(result):
        print('exmae = {}'.format(result["co"]) + ' '+ str(result["exmae"]), flush=True)
        # update counter(s) of completion
        progressbar.update()
        exmae_progress.update()
        total_progress.update(math.ceil(obj_targetloc['data'].shape[0]*data_test_percentage))
    
    def on_error(err):
        print("ERROR: {}".format(err), flush=True)
        traceback.print_exception(type(err), err, err.__traceback__)

    # same for every exmae
    obj_targetloc['tr'], obj_targetloc['test'] = train_test_split(obj_targetloc['data'], test_size=data_split_size, shuffle=False)
    # create pool processes & set global/shared variables
    pool = multiprocessing.Pool(
        processes=process_limit, 
        initializer=initExmaeWorker, 
        initargs=(poolLock, obj_targetloc, l_exoloc)
    )
    print("[Exogenous_Variables] Created exmae processing pool with {} processes".format(pool._processes), flush=True)
        
    for exog in l_exoloc:
        extr, extest = train_test_split(exog, test_size=data_split_size, shuffle=False)
        pool.apply_async(find_exmae, args=(extr,extest), kwds={}, callback=on_success, error_callback=on_error)
    
    pool.close()      # no more tasks can be added for the pool to accomplish
    print("[Exogenous_Variables] Pool executing {0} tasks across {1} processes".format(len(l_exoloc), pool._processes), flush=True)
        
    try:
        print("[Exogenous_Variables] waiting for exmae workers...", flush=True)
        pool.join()       # tell parent to wait until all tasks are accomplished by the process pool
    except KeyboardInterrupt:
        print ("\nMultiprocessing Pool: KeyboardInterrupt. Terminating exmae workers...")
        pool.terminate()
        pool.join()
        raise KeyboardInterrupt() # continue bubble up to kill parent process
    finally:
        progressbar.close()   # End progress bar for the entire pool of exmaes
    
    # Evaluate & save found bettermae
    if os.path.isfile(tmp_bettermae_filename):
        tmp_bettermae_file = open(tmp_bettermae_filename, 'r')
        improvement_exog = json.loads(tmp_bettermae_file.read())
        tmp_bettermae_file.close()
        os.remove(tmp_bettermae_file.name)                               # tmp file cleanup
        
        all_results_file = open(results_filename, 'r')
        all_results = json.loads(all_results_file.read())
        all_results_file.close()
            
        filtered_results = all_results[obj_targetloc['name']]
        filtered_results['exogen'] = {}                              # reset dictionary
        for key,value in improvement_exog.items():                   # fill exogen dictionary with valuable vars
            filtered_results['exogen'][key] = value
            
        all_bettermae = update_JSON_file(bettermae_results_filename, None, (obj_targetloc['name'], filtered_results)) # save with default adjuster
        print("Improvement_exog: {0}: {1}".format(obj_targetloc['name'], json.dumps(all_bettermae[obj_targetloc['name']], indent=4)))
    
    return()


In [None]:
def update_JSON_file(filename, adjustfn, arglist=(), kwargs={}, sort=True):
    ''' Generic function to handle JSON file updates.  Reads-in entire file, 
        federates out updates with adjustment fn's, and then overwrites original file completely
        Handles FileNotFoundError & JSONDecodeError automatically.
        args: filename = json-encoded file on disk
              adjustfn = function to perform adjustments to loaded dictionary file
              arglist = positional args to pass on to adjustfn
              kwargs = keyword args to pass on to adjustfn
              sort = flag to auto-sort keys when saving to file [Default = True]
        returns: dictionary object that was updated and saved to file
    '''
    def default_dict_adjustfn(data, key, value):
        ''' Generic default function for updating a basic dictionary data file (top level keys only)
            args: data = dictionary representation of JSON data from file
                  key = key name to enter into dictionary
                  value = value to enter into dictionary[key]
            returns: Updated dictionary with key/value added
        '''
        data[key] = value
        return(data)
    
    def default_list_adjustfn(data, value):
        ''' Generic default function for updating a basic list data file (add to bottom of list)
            args: data = list representation of JSON data from file
                  value = value to append to end of list, list[len(list)] = value
            returns: Updated list with value appended
        '''
        data.append(value)
        return(data)
    
    loaded = False
    while not loaded:
        try:
            file = open(filename, "r+")
            json_data = json.loads(file.read())
        except FileNotFoundError:
            open(filename, "w+").close()       # create file on disk
            continue
        except json.JSONDecodeError:
            json_data = {}
        
        loaded = True
        if adjustfn is not None:
            json_data = adjustfn(json_data, *arglist, **kwargs)
        else:
            if isinstance(json_data, dict):
                json_data = default_dict_adjustfn(json_data, *arglist, **kwargs)
            elif isinstance(json_data, list):
                json_data = default_list_adjustfn(json_data, *arglist, **kwargs)
            else:
                raise ValueError('Unable to adjust JSON since function not provided or file not of type dict or list!')
        
        file.seek(0)                           # Go to first line, first column of file
        file.write( json.dumps(json_data, sort_keys=sort, indent=4)+'\n')
        file.truncate()                        # end file here, delete anything after the current file position
        file.close()
    
    return(json_data)

In [None]:
def exog_combinations(df, exoe):
    ''' This function takes the dataframe of rain data and the list of exogenous variables from a single NC
    location and then returns a list of dataframes that contains all of the rainfall data for just the 
    exogenous variables
    '''
    lo_dfs = []
    if len(exoe) == 1:
        lo_dfs.append(df.loc[:,exoe])
    if len(exoe) > 1:
        lo_dfs.append(df.loc[:,exoe])
        for ex in exoe:
            lo_dfs.append(df.loc[:,[ex]])
        if len(exoe) >2:
            for i in range(2, len(exoe)):
                combolist = list(combinations(exoe,i))
                for c in combolist:
                    lo_dfs.append(df.loc[:,c])
    return(lo_dfs)



### Data Evaluation
Finds combinations of exogenous variable locations and starts model evaluation of combinations

In [None]:
autoExogen = True   # flag for manual use

# Defining set of cities to evaluate
if autoExogen or getpass.getuser() == "rainfalld":       # docker daemon, automatically do all exogen
    todokeys = exogen.keys()
else:    # manual setting of dictionary elements to do
    todokeys = ('ARCOLA, NC', 'HENDERSON 2 NNW, NC', 'LAURINBURG, NC', 'ROANOKE RAPIDS, NC', 'MURFREESBORO, NC', 'LUMBERTON AREA, NC', 'LONGWOOD, NC', 'WHITEVILLE 7 NW, NC', 'CHARLOTTE AREA, NC', 'MOUNT MITCHELL AREA, NC', 'ASHEVILLE AIRPORT, NC', 'BANNER ELK, NC', 'BEECH MOUNTAIN, NC', 'BRYSON CITY 4, NC', 'BREVARD, NC', 'CASAR, NC', 'COWEETA EXP STATION, NC', 'CULLOWHEE, NC', 'FOREST CITY 8 W, NC', 'FRANKLIN, NC', 'GASTONIA, NC', 'GRANDFATHER MTN, NC', 'HENDERSONVILLE 1 NE, NC', 'HIGHLANDS, NC', 'HOT SPRINGS, NC', 'LAKE LURE 2, NC', 'LAKE TOXAWAY 2 SW, NC', 'MARSHALL, NC', 'MONROE 2 SE, NC', 'MOUNT HOLLY 4 NE, NC', 'OCONALUFTEE, NC', 'PISGAH FOREST 3 NE, NC', 'ROBBINSVILLE AG 5 NE, NC', 'ROSMAN, NC', 'SHELBY 2 NW, NC', 'TAPOCO, NC', 'TRYON, NC', 'WAYNESVILLE 1 E, NC', 'BOONE 1 SE, NC', 'DANBURY, NC', 'EDEN, NC', 'MOUNT AIRY 2 W, NC', 'REIDSVILLE 2 NW, NC', 'HAYESVILLE 1 NE, NC', 'MURPHY 4ESE, NC', 'KING, NC')

sub_exogen = {k: exogen[k] for k in todokeys}

In [None]:
from collections import defaultdict
l_o_dfs = defaultdict(list)
for key,value in tqdm(sub_exogen.items()):
    lo_dfs2 = exog_combinations(rd, value)
    l_o_dfs[key] = lo_dfs2


In [None]:
results_filename = os.path.join(destdir,"allMAE.json")
bettermae_results_filename = os.path.join(destdir,"allBetterMAE.json")
tmp_bettermae_filename = os.path.join(destdir, "tmp_bettermae.json")

# best_comb = [[4,3,3,4]]
warnings.filterwarnings("ignore")

files=[tmp_bettermae_filename]
while (len(files) > 0):                          # reset results on new run
    try:
        os.remove( files[-1] )
    except FileNotFoundError:                    # ignore since non-exist is the desired state
        pass
    except OSError as err:
        traceback.print_exception(type(err), err, err.__traceback__)
    finally:
        files.pop()


data_test_percentage = 0.2                                           # 20%
num_single_predictions = math.ceil(rd.shape[0]*data_test_percentage)
num_all_predictions = len(l_o_dfs.items())*num_single_predictions    # keymae predictions amount
num_all_exmae = 0
for key,value in l_o_dfs.items():
    num_all_exmae += len(value)                                      # exmae predictions amount
num_all_predictions += num_all_exmae*num_single_predictions

keymae_storage = {}    
# tqdmformat = '{desc}: |{bar}|{percentage:3.0f}%'
total_progress = tqdm(desc="Full Calculation:", total=num_all_predictions, position=0)
keymae_progress = tqdm(desc="Finding keymaes:", total=len(l_o_dfs.items()), position=1)
exmae_progress = tqdm(desc="Evaluating exmaes:", total=num_all_exmae, position=2)

try:
    # Solve for targetloc Keymae Values first
    print("============== KEYMAE EVALUATION ===============")
    pbars = { 'total_pbar': total_progress, 'keymae_pbar': keymae_progress, 'exmae_pbar': exmae_progress }
    keymae_storage = targetloc_vars(rd, list(l_o_dfs.keys()), data_test_percentage, pbars)

    # Solve for exmae values of each combination of targetloc and matching exogenous variable
    print("\n=============== EXMAE EVALUATION ===============")
    for key,value in l_o_dfs.items():
        print("\nFinding exmaes values for {0}:".format(key))
        targetloc_obj = { 'data': rd[key], 'name': key, 'keymae': keymae_storage[key] }
        exogenous_var(targetloc_obj, data_test_percentage, value, pbars)

except KeyboardInterrupt:
    print("MANUAL EXIT: Program interrupted by user.", flush=True)
    raise SystemExit(2)
except Exception as err:
    print("ERROR: {}".format(err), flush=True)
    traceback.print_exception(type(err), err, err.__traceback__)
    raise SystemExit(1)
else:
    print("\n==== EXOGENOUS VARIABLE EVALUATION COMPLETE ====\n")
finally:
    keymae_progress.close()
    exmae_progress.close()
    total_progress.close()
