In [1]:
import os
import numpy as np
import pandas as pd
import re

In [2]:
# cwd = r'C:\apps\mtc\travel-model-two\model-files\model'
cwd = '/Users/chesterharvey/apps/mtc/travel-model-two/model-files/model'

In [3]:
uec = pd.read_excel(os.path.join(cwd, 'AutoOwnership.xls'), engine='xlrd', sheet_name='Auto Ownership', skiprows=3)

In [4]:
# Label front columns
labeling = {
    'Unnamed: 0': 'uec_id',
    'Unnamed: 1': 'token',
    'Unnamed: 2': 'Description',
    'Unnamed: 3': 'filter',
    'Unnamed: 4': 'formula',
    'Unnamed: 5': 'index'
}
uec = uec.rename(columns=labeling)

# Convert cells that are fully whitespace to NaN
uec = uec.replace(r'^\s*$', np.nan, regex=True)

# Drop all lines that are entirely NaN
uec = uec.dropna(how='all')

In [5]:
# Remove formulas that equal 1--it's implied
uec.loc[uec['formula'] == 1, 'formula'] = np.nan

In [6]:
 # Make sure filter and formula are strings
uec[['filter', 'formula']] = uec[['filter', 'formula']].astype(str)

In [7]:
# Identify coefficient columns
coeff_columns = [col for col in uec.columns if col not in labeling.values()]
coeff_columns

['0_CARS',
 '1_CAR_1CV',
 '1_CAR_1AV',
 '2_CARS_2CV',
 '2_CARS_2AV',
 '2_CARS_1CV1AV',
 '3_CARS_3CV',
 '3_CARS_3AV',
 '3_CARS_2CV1AV',
 '3_CARS_1CV2AV',
 '4_CARS_4CV']

In [8]:
# List tokens, which should be defined from household table
uec[uec['token'].notnull()].token

0                         nest0
1                         nest1
2                         nest2
3                    nestCoeff0
4                    nestCoeff1
5                    nestCoeff2
8            useAccessibilities
9     nonMotorizedAccessibility
10                   autoAccess
11                transitAccess
12        workersAutoDependency
13       studentsAutoDependency
14        workersRailProportion
15       studentsRailProportion
16          intersectionDensity
17            populationDensity
18                retailDensity
19                      drivers
20                    ftWorkers
21                    ptWorkers
22                      workers
23                  youngAdults
24               schoolChildren
25                  oldRetirees
26                youngRetirees
27                     HHIncome
28    highSchoolGraduateBoolean
29      detachedDwellingBoolean
30                    hhBoolean
31                         SFID
32                   hhCountyId
33      

In [9]:
# Identify utilities as rows that don't have tokens (which are precalculated aliases)
utilities = uec[uec['token'].isnull()].copy()

In [10]:
def build_expression(filt, formula):
    if filt != 'nan' and formula != 'nan':
        return f'({filt}) * ({formula})'
    
    if filt != 'nan':
        return filt
     
    return formula

# Take product of filter and formula to make expressions
utilities['Expression'] = utilities.apply(lambda x: build_expression(x['filter'], x['formula']), axis=1)

In [11]:
def parse_if_to_where(expression):
    if expression[:3] == 'where(':
        expression = expression.replace('if', 'np.where')
        expression = expression.split(',')[0]
    return expression

# Parse if statements
utilities['Expression'] = utilities.Expression.apply(parse_if)

In [12]:
utilities[['filter','formula','Expression']]

Unnamed: 0,filter,formula,Expression
34,,"if(hhCountyId == SFID, 1, 0)",hhCountyId == SFID
35,drivers==1,,drivers==1
36,drivers==2,,drivers==2
37,drivers==3,,drivers==3
38,drivers>=4,,drivers>=4
...,...,...,...
93,,%Mobility.AV.Share%==1.0,%Mobility.AV.Share%==1.0
94,,%Mobility.AV.Share%==0.9,%Mobility.AV.Share%==0.9
95,,%Mobility.AV.Share%==0.5,%Mobility.AV.Share%==0.5
96,,%Mobility.AV.Share%==0.1,%Mobility.AV.Share%==0.1


In [13]:
refactor_lookup = {
    'hhCountyId': '@df.county_id',
    'SFID': 'ID_SAN_FRANCISCO',
    'drivers': 'num_drivers',
    'workers': 'num_workers',
    'youngAdults': 'num_young_adults',
    'schoolChildren': 'num_children_6_to_15',
    'youngRetirees': 'num_young_retirees',
    'oldRetirees': 'num_old_retirees',
    'HHIncome': 'income',
    'highSchoolGraduateBoolean': '#######',
    'detachedDwellingBoolean': '#######',
    'intersectionDensity': '#######', # Need to get from land use
    'populationDensity': '#######', # Need to get from land use,
    'retailDensity': '#######', # Need to get from land use,
    'nonMotorizedAccessibility': '#######', # Does this only come into play on a second iteration?
    'autoAccess': '#######', # Does this only come into play on a second iteration?
    'transitAccess': '#######', # Does this only come into play on a second iteration?
    'useAccessibilities': '#######', # Does this only come into play on a second iteration?
    'workersAutoDependency': '#######', # Does this only come into play on a second iteration?
    'studentsAutoDependency': '#######', # Does this only come into play on a second iteration?
    'workersRailProportion': '#######', # Does this only come into play on a second iteration?
    'studentsRailProportion': '#######', # Does this only come into play on a second iteration?
    'hhBoolean': '#######', ### Not sure what this is
    'nonFamily': 'non_family',
    '@workAutoTime': '#######', # Where does this come from?
    '@numPersons18to35': 'num_persons_18_to_35',
    '@numPersons65plus': 'num_persons_65_plus',
    '%Mobility.AV.Share%': '#######', # Needs to get from settings
}

In [14]:
def refactor(expression):
    for old, new in refactor_lookup.items():
        if old in expression:
            # If the new value is not yet known (marked as '#######')
            if bool(re.match(r'^#*$', new)):
                # Mark and return the existing value
                expression = expression.replace(old, f'##{old}##')
            else:
                # Otherwise replace old with new
                expression = expression.replace(old, new)
    return expression

utilities.Expression = utilities.Expression.apply(refactor)

In [15]:
utilities['Label'] = ''
utilities = utilities[['Label', 'Description', 'Expression'] + coeff_columns].copy()

In [16]:
utilities.to_csv('auto_ownership_interim.csv', index=False)

In [None]:
# utilities.to_csv('utilities.csv')
# utilities[['filter','formula']].head()
# parse_if('if(hhCountyId == SFID, 1, 0)')

In [None]:
households = pd.read_csv('households.csv')

In [None]:
households.head()

In [None]:
households.PERSONS

In [None]:
households.eval('(PERSONS == 2) * (PERSONS/HHT)')

In [None]:
utilities.head()

# RELOAD EXPORTED FILE WITH 'LABEL' FILLED IN

In [None]:
utils = pd.read_csv('auto_ownership_interim.csv')
utils[coeff_columns] = utils[coeff_columns].fillna(0)

In [None]:
utils

In [None]:
utils.eval('"0_CARS" + "1_CAR_1CV"')

In [None]:
# for col in coeff_columns:
#     for x,y in tuple(zip(utils['Label'], utils[col])):
#         break

In [None]:
utils.Label = utils.Label.fillna('')

In [None]:
coeff_values = [(x + '_O_CARS', y) for col in coeff_columns for x,y in tuple(zip(utils['Label'], utils[col]))]
coeff_values = pd.DataFrame(coeff_values, columns=['coefficient_name', 'value'])
coeff_values['constrain'] = 'F'
coeff_values.head()

In [None]:
for col in coeff_columns:
    utils[col] = utils['Label'] + '_' + col

# utils.to_csv('auto_ownership_file.csv')
# coeff_values.to_csv('auto_ownership_coefficients.csv')
utils.head()

In [None]:
utils