In [2]:
import os
import pandas as pd
import pandas_gbq
import pytz
import io
from dbfread import DBF

In [3]:
# import global TDM functions
import sys
sys.path.insert(0, '..\..\Resources\Python-Functions')
import BigQuery
import TDM

client = BigQuery.getBigQueryClient_TDMScenarios()


# Read all Scenario Outputs 

In [17]:
tdmVersions = pd.DataFrame([
    [1, 1,'v832', 'WF TDM v8.3.2 - 2022-02-04a'        , '\\\modelace\\ModelAce-E\\1 - TDM\\1 - Official Release (full run)\\v8.3.2\\WF TDM v8.3.2 - 2022-02-04a\\Scenarios'             ],
    [1, 2,'v832', 'WF TDM v8.3.2 - 2022-09-21'         , '\\\modelace\\ModelAce-E\\1 - TDM\\1 - Official Release (full run)\\v8.3.2\\WF TDM v8.3.2 - 2022-09-21\\Scenarios'              ],
    [1, 3,'v9'  , 'WF TDM v9.0 - 2022-12-19'           , '\\\modelace\\ModelAce-E\\1 - TDM\\2 - Sandbox\\v9_Development\\WF TDM v9.0 - 2022-12-19\\Scenarios'                            ],
    [1, 4,'v9'  , 'WF TDM v9.0 - 2023-02-23-MCCalib-r0', r'A:\1 - TDM\3 - Model Dev\1 - WF\2 - Sandbox\v9.0Beta\_archive\WF TDM v9.0 - 2023-02-23-MCCalib-r0\Scenarios'                  ],
    [1, 5,'v9'  , 'WF TDM v9.0 - 2023-02-23-MCCalib-r2', r'A:\1 - TDM\3 - Model Dev\1 - WF\2 - Sandbox\v9.0Beta\_archive\WF TDM v9.0 - 2023-02-23-MCCalib-r2\Scenarios'                  ],
    [1, 6,'v832', 'WF TDM v8.3.2 - 2023-02-28'         , '\\\modelace\\ModelAce-D\\1 - TDM\\3 - Model Dev\\1 - WF\\1 - Official Release\\v8.3.2\\WF TDM v8.3.2 - 2023-02-28\\Scenarios'  ],
    [1, 7,'v9'  , 'WF TDM v9.0 - 2023-04-14'           , r'V:\1 - TDM\2 - Sandbox\v9_Development\WF TDM v9.0 - 2023-04-14\Scenarios'                                                     ],
    [1, 8,'v9'  , 'WF TDM v9.0 - 2023-04-28'           , r'V:\1 - TDM\2 - Sandbox\v9_Development\WF TDM v9.0 - 2023-04-28\Scenarios'                                                     ],
    [1, 9,'v9'  , 'WF TDM v9.0 - 2023-06-30'           , r'A:\1 - TDM\3 - Model Dev\1 - WF\1 - Official Release\v9.0\WF TDM v9.0 - 2023-06-30\Scenarios'                                 ]
], columns = (['PROCESS','tdmVersionID','tdmVersion', 'tdmVersionWithDate', 'dirTdmVersionScenarios']))

segScenarios = pd.DataFrame([
    [ 1,'BY_2019'        ,'BY'            , 2019],
    [ 2,'BY_2023'        ,'BY'            , 2023],
    [ 3,'EF_RTP_2032'    ,'EF_RTP'        , 2032],
    [ 4,'EF_RTP_2042'    ,'EF_RTP'        , 2042],
    [ 5,'EF_RTP_2050'    ,'EF_RTP'        , 2050],
    [ 6,'Needs_2032'     ,'Needs'         , 2032],
    [ 7,'Needs_2042'     ,'Needs'         , 2042],
    [ 8,'Needs_2050'     ,'Needs'         , 2050],
    [ 9,'NoBuild_2032'   ,'NoBuild'       , 2032],
    [10,'NoBuild_2042'   ,'NoBuild'       , 2042],
    [11,'NoBuild_2050'   ,'NoBuild'       , 2050],
    [12,'RTP_2030'       ,'RTP'           , 2030],
    [13,'RTP_2032'       ,'RTP'           , 2032],
    [14,'RTP_2040'       ,'RTP'           , 2040],
    [15,'RTP_2042'       ,'RTP'           , 2042],
    [16,'RTP_2050'       ,'RTP'           , 2050],
    [17,'TIP_2024'       ,'TIP'           , 2024],
    [18,'TIP_2028'       ,'TIP'           , 2028]
], columns = (['scenarioID','scenarioName','scenarioGroup','scenarioYear']))

# Merge all Scenario Outputs

In [18]:
from pathlib import Path

dirWork    = os.getcwd()

dfSegSummaryMaster      = pd.DataFrame()
dfTransitShareMaster    = pd.DataFrame()
dfRouteSummaryMaster    = pd.DataFrame()
dfSESummaryMaster       = pd.DataFrame()
dfScenariosMaster       = pd.DataFrame()
dfTransitODDetailMaster = pd.DataFrame()
dfTransitPADetailMaster = pd.DataFrame()

scenarioID = 0

for index, rowT in tdmVersions.iterrows():

    if rowT['PROCESS']:

        _vID = rowT['tdmVersionID']

        for index, rowS in segScenarios.iterrows():

            # get scenario path
            _scenarioPath = os.path.join(rowT['dirTdmVersionScenarios'], rowS['scenarioName'])

            # check if scenario exists
            #print(_scenarioPath)
            if os.path.exists(_scenarioPath):
                
                try:
                    
                    _sID = rowS['scenarioID']

                    # round wasn't keeping float at 2 decimals, and was doing weird stuff
                    scenarioID = float("{:0.2f}".format(_vID + (_sID / 100)))

                    print(scenarioID)

                    # get values from Control Center to use to create filenames for outputs
                    _unloadednetprefix = TDM.getControlCenterValue(os.path.join(_scenarioPath, "1ControlCenter.block"),'UnloadedNetPrefix')
                    _runid = TDM.getControlCenterValue(os.path.join(_scenarioPath, "1ControlCenter.block"),'RID')

                    dfScenarios = pd.DataFrame([
                        [scenarioID, rowT['tdmVersion'],rowT['tdmVersionWithDate'], rowS['scenarioName'], rowS['scenarioGroup'], rowS['scenarioYear']]
                    ], columns = (['scenarioID','tdmVersion','tdmVersionWithDate','scenarioName','scenarioGroup','scenarioYear']))
                    dfScenariosMaster = pd.concat([dfScenariosMaster, dfScenarios])
                    
                    try:
                        # get and join all segment summaries
                        dirSegSummary = _scenarioPath + '\\5_AssignHwy\\4_Summaries\\' + _unloadednetprefix + '_Summary_SEGID.csv'
                        if Path(dirSegSummary).is_file():
                            # file exists
                            dfSegSummary = pd.read_csv(dirSegSummary)
                            dfSegSummary['scenarioID'] = scenarioID
                            dfSegSummary.insert(0, 'scenarioID', dfSegSummary.pop('scenarioID')) # move scenario id to first column
                            dfSegSummaryMaster = pd.concat([dfSegSummaryMaster, dfSegSummary])
                                
                    except FileNotFoundError as f:
                        print(f)

                    try:
                        # get and join all transit route tables
                        dirRouteSummary = _scenarioPath + '\\4_ModeChoice\\3_TransitAssign\\_' + _runid + '_1_PA_Route.dbf'
                        if Path(dirRouteSummary).is_file():
                            # file exists
                            dfRouteSummary = pd.DataFrame(DBF(dirRouteSummary, load=True))
                            dfRouteSummary['scenarioID'] = scenarioID
                            dfRouteSummary.insert(0, 'scenarioID', dfRouteSummary.pop('scenarioID')) # move scenario id to first column
                            dfRouteSummaryMaster = pd.concat([dfRouteSummaryMaster, dfRouteSummary])
                        
                    except FileNotFoundError as f:
                        print(f)

                    try:
                        # get and join all se input tables
                        dirSESummary = _scenarioPath + '\\0_InputProcessing\\SE_File_' + _runid + '.dbf'
                        if Path(dirSESummary).is_file():
                            # file exists    
                            dfSESummary = pd.DataFrame(DBF(dirSESummary, load=True))
                            dfSESummary['scenarioID'] = scenarioID
                            dfSESummary.insert(0, 'scenarioID', dfSESummary.pop('scenarioID')) # move scenario id to first column
                            dfSESummaryMaster = pd.concat([dfSESummaryMaster, dfSESummary])

                    except FileNotFoundError as f:
                        print(f)

                    try:
                        # get and join all transit share data
                        dirTransitShare = _scenarioPath + '\\4_ModeChoice\\4_Shares\\_Shares_Summary_long.csv'
                        if Path(dirTransitShare).is_file():
                            # file exists    
                            dfTransitShare = pd.read_csv(dirTransitShare)
                            dfTransitShare['scenarioID'] = scenarioID
                            dfTransitShare.insert(0, 'scenarioID', dfTransitShare.pop('scenarioID')) # move scenario id to first column
                            dfTransitShareMaster = pd.concat([dfTransitShareMaster, dfTransitShare])

                    except FileNotFoundError as f:
                        print(f)

                    try:
                        # get and join all transit share data
                        dirTransitODDetail = _scenarioPath + '\\4_ModeChoice\\3_TransitAssign\\_' + _runid + '_2_OD_Station_Detail.dbf'
                        if Path(dirTransitODDetail).is_file():
                            # file exists  
                            dfTransitODDetail = pd.DataFrame(DBF(dirTransitODDetail, load=True))
                            dfTransitODDetail['scenarioID'] = scenarioID
                            dfTransitODDetail.insert(0, 'scenarioID', dfTransitODDetail.pop('scenarioID')) # move scenario id to first column
                            dfTransitODDetailMaster = pd.concat([dfTransitODDetailMaster, dfTransitODDetail])
                        
                    except FileNotFoundError as f:
                        print(f)

                    try:
                        # get and join all transit share data
                        dirTransitPADetail = _scenarioPath + '\\4_ModeChoice\\3_TransitAssign\\_' + _runid + '_1_PA_Station.dbf'
                        if Path(dirTransitPADetail).is_file():
                            # file exists  
                            dfTransitPADetail = pd.DataFrame(DBF(dirTransitPADetail, load=True))
                            dfTransitPADetail['scenarioID'] = scenarioID
                            dfTransitPADetail.insert(0, 'scenarioID', dfTransitPADetail.pop('scenarioID')) # move scenario id to first column
                            dfTransitPADetailMaster = pd.concat([dfTransitPADetailMaster, dfTransitPADetail])
                        
                    except FileNotFoundError as f:
                        print(f)

                except FileNotFoundError as f:
                    print(f)
                    continue


1.01
1.12
1.14
1.16
1.17
2.01
2.02
2.06
2.07
2.08
2.09
2.1
2.11
2.13
2.15
2.16
2.18
3.01
3.02
3.03
3.04
3.05
3.06
3.07
3.08
3.09
3.1
3.11
3.13
3.15
3.16
3.18
4.01
5.01
6.01
6.02
6.13
6.15
6.16
6.18
7.01
7.02
7.03
7.04
7.05
7.09
7.1
7.11
7.13
7.15
7.16
7.18
8.01
8.02
8.11
8.13
8.15
8.16
8.18
9.01
9.02
9.06
9.07
9.08
9.09
9.1
9.11
9.13
9.15
9.16
9.18


In [19]:
dfTransitPADetailMaster.groupby(['scenarioID'],as_index=False).agg(TOTBRD = ('DY_BRDA','sum'))

Unnamed: 0,scenarioID,TOTBRD
0,1.01,156823.33
1,1.12,226920.53
2,1.14,276078.01
3,1.16,348328.10
4,1.17,182874.82
...,...,...
65,9.11,308790.03
66,9.13,268321.09
67,9.15,357646.80
68,9.16,437792.22


# Create New Scenario Output Data as BigQuery Files

In [20]:
# create GBQ tables
pandas_gbq.to_gbq(dfScenariosMaster      , 'tdm_scenarios_output.scenarios'      , project_id="tdm-scenarios")
pandas_gbq.to_gbq(dfSegSummaryMaster     , 'tdm_scenarios_output.segment_summary', project_id="tdm-scenarios")
pandas_gbq.to_gbq(dfTransitShareMaster   , 'tdm_scenarios_output.transit_share'  , project_id="tdm-scenarios")
pandas_gbq.to_gbq(dfRouteSummaryMaster   , 'tdm_scenarios_output.route_summary'  , project_id="tdm-scenarios")
pandas_gbq.to_gbq(dfSESummaryMaster      , 'tdm_scenarios_output.se_table'       , project_id="tdm-scenarios")
pandas_gbq.to_gbq(dfTransitODDetailMaster, 'tdm_scenarios_output.route_od_detail', project_id="tdm-scenarios")
pandas_gbq.to_gbq(dfTransitPADetailMaster, 'tdm_scenarios_output.route_pa_detail', project_id="tdm-scenarios")


100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 1007.76it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 1045.96it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]


# Append New Scenario Output Data to BigQuery Files

In [13]:
# functions
def update_gbq_table(tableID, df):
    #client.query("DELETE * FROM " + tableID + "WHERE scenarioID > 0")
    job_config = BigQuery.bigquery.LoadJobConfig(write_disposition="WRITE_TRUNCATE",)
     # Make an API request.
    job = client.load_table_from_dataframe(
        df, 
        tableID, 
        job_config
    ) 
    # Wait for the job to complete.
    job.result()

def append_new_rows(tableID, df):
    dfGBQ = client.query("SELECT * FROM " + tableID).to_dataframe()
    dfGBQList = list(set((dfGBQ.scenarioID.tolist())))
    df2 = df[~(df.scenarioID.isin(dfGBQList))]
    return(df2)

In [14]:
# determine new rows to be added to GBQ tables
dfScenariosMaster2       = append_new_rows('tdm-scenarios.tdm_scenarios_output.scenarios'      , dfScenariosMaster)
dfSegSummaryMaster2      = append_new_rows('tdm-scenarios.tdm_scenarios_output.segment_summary', dfSegSummaryMaster)
dfTransitShareMaster2    = append_new_rows('tdm-scenarios.tdm_scenarios_output.scenarios'      , dfTransitShareMaster)
dfRouteSummaryMaster2    = append_new_rows('tdm-scenarios.tdm_scenarios_output.route_summary'  , dfRouteSummaryMaster)
dfSESummaryMaster2       = append_new_rows('tdm-scenarios.tdm_scenarios_output.se_table'       , dfSESummaryMaster)
dfTransitODDetailMaster2 = append_new_rows('tdm-scenarios.tdm_scenarios_output.route_od_detail', dfTransitODDetailMaster)
dfTransitPADetailMaster2 = append_new_rows('tdm-scenarios.tdm_scenarios_output.route_pa_detail', dfTransitPADetailMaster)

NotFound: 404 Not found: Table tdm-scenarios:tdm_scenarios_output.scenarios was not found in location us-west3

Location: us-west3
Job ID: c7ddc5ab-1f15-4b8c-8a3d-492fe71f8f95


In [None]:
# add new scenario rows to GBQ tables
update_gbq_table('tdm-scenarios.tdm_scenarios_output.scenarios'      , dfScenariosMaster2      )
update_gbq_table('tdm-scenarios.tdm_scenarios_output.segment_summary', dfSegSummaryMaster2     )
update_gbq_table('tdm-scenarios.tdm_scenarios_output.transit_share'  , dfTransitShareMaster2   )
update_gbq_table('tdm-scenarios.tdm_scenarios_output.route_summary'  , dfRouteSummaryMaster2   )
update_gbq_table('tdm-scenarios.tdm_scenarios_output.se_table'       , dfSESummaryMaster2      )
update_gbq_table('tdm-scenarios.tdm_scenarios_output.route_od_detail', dfTransitODDetailMaster2)
update_gbq_table('tdm-scenarios.tdm_scenarios_output.route_pa_detail', dfTransitPADetailMaster2)

# Notes

In [168]:
dfGBQ = client.query("SELECT * FROM " + 'tdm-scenarios.tdm_scenarios_output.scenarios').to_dataframe()
dfGBQList = list(set((dfGBQ.scenarioID.tolist())))[0:20]
df3 = dfScenariosMaster[~(dfScenariosMaster.scenarioID.isin(dfGBQList))]



In [67]:
job = client.query("DELETE FROM " + 'tdm-scenarios.tdm_scenarios_output.scenarios' + " WHERE scenarioID > 0")
job.result()

Forbidden: 403 Billing has not been enabled for this project. Enable billing at https://console.cloud.google.com/billing. DML queries are not allowed in the free tier. Set up a billing account to remove this restriction.

Location: us-west3
Job ID: e1725aee-d386-469b-b1b6-47656ed242e2


In [None]:
# update scenarios
table_id_1 = 'tdm-scenarios.tdm_scenarios_output.scenarios'
job = client.load_table_from_dataframe(
    dfScenariosMaster, 
    table_id_1, 
    job_config
)  # Make an API request.
job.result()  # Wait for the job to complete.
table = client.get_table(table_id_1)  # Make an API request.

In [20]:
client.get_table("tdm-scenarios.tdm_scenarios_output.scenarios")  # Make an API request.

Forbidden: 403 GET https://bigquery.googleapis.com/bigquery/v2/projects/tdm-scenarios/datasets/tdm_scenarios_output/tables/scenarios?prettyPrint=false: Access Denied: Table tdm-scenarios:tdm_scenarios_output.scenarios: Permission bigquery.tables.get denied on table tdm-scenarios:tdm_scenarios_output.scenarios (or it may not exist).

In [26]:
table_id_s = 'tdm-scenarios.tdm_scenarios_output.segment_summary'
job_config = bigquery.LoadJobConfig(write_disposition="WRITE_TRUNCATE")
job = client.load_table_from_dataframe(dfSegSummaryMaster, table_id_s, job_config)  # Make an API request.
job.result()  # Wait for the job to complete.
table = client.get_table(table_id_s)  # Make an API request.



In [28]:
display(table)

Table(TableReference(DatasetReference('tdm-scenarios', 'tdm_scenarios_output'), 'segment_summary'))

In [6]:
# Still To Do:
#   - Start seaborn plot comparison code (in new folder?)
#   - Figure out how to "append" the tables I created to BigQuery!
#       - append both id table and other tables -- bill is showing example