In [1]:
import os
import pandas as pd
import pandas_gbq
import pytz
import io
from dbfread import DBF

In [2]:
# import global TDM functions

import sys
sys.path.insert(0, '..\..\Resources\Python-Functions')
import TDM

In [3]:
#google cloud big query libraries
from google.cloud import bigquery
from google.oauth2 import service_account

if os.getlogin()=='cday':
    key_path = r"C:\Users\cday\tdm-scenarios-c90ba30c3c5d.json"
elif os.getlogin()=='bhereth':
    key_path = r"C:\Users\bhereth\tdm-scenarios-a85044dbbfd3.json"

credentials = service_account.Credentials.from_service_account_file(
    key_path, scopes=["https://www.googleapis.com/auth/cloud-platform"],
)
 
client = bigquery.Client(credentials=credentials, project=credentials.project_id,)

# Read all Scenario Outputs 

In [4]:
tdmVersions = pd.DataFrame([
    ['v832', 'WF TDM v8.3.2 - 2022-02-04a', '\\\modelace\\ModelAce-E\\1 - TDM\\1 - Official Release (full run)\\v8.3.2\\WF TDM v8.3.2 - 2022-02-04a' ],
    ['v832', 'WF TDM v8.3.2 - 2022-09-21' , '\\\modelace\\ModelAce-E\\1 - TDM\\1 - Official Release (full run)\\v8.3.2\\WF TDM v8.3.2 - 2022-09-21'  ],
    ['v9'  , 'WF TDM v9.0 - 2022-12-19'   , '\\\modelace\\ModelAce-D\\1 - TDM\\3 - Model Dev\\1 - WF\\2 - Sandbox\\v9.0Beta\WF TDM v9.0 - 2022-12-19']
], columns = (['tdmVersion', 'tdmVersionWithDate', 'dirTdmVersion']))

segScenarios = pd.DataFrame([
    ['BY_2019'     ,'BY'     , 2019],
    ['BY_2023'     ,'BY'     , 2023],
    ['EF_RTP_2032' ,'EF_RTP' , 2032],
    ['EF_RTP_2042' ,'EF_RTP' , 2042],
    ['EF_RTP_2050' ,'EF_RTP' , 2050],
    ['Needs_2032'  ,'Needs'  , 2032],
    ['Needs_2042'  ,'Needs'  , 2042],
    ['Needs_2050'  ,'Needs'  , 2050],
    ['NoBuild_2032','NoBuild', 2032],
    ['NoBuild_2042','NoBuild', 2042],
    ['NoBuild_2050','NoBuild', 2050],
    ['RTP_2030'    ,'RTP'    , 2030],
    ['RTP_2032'    ,'RTP'    , 2032],
    ['RTP_2040'    ,'RTP'    , 2040],
    ['RTP_2042'    ,'RTP'    , 2042],
    ['RTP_2050'    ,'RTP'    , 2050],
    ['TIP_2024'    ,'TIP'    , 2024],
    ['TIP_2028'    ,'TIP'    , 2028]
], columns = (['scenarioName','scenarioGroup','scenarioYear']))

# Merge all Scenario Outputs

In [5]:
dirWork    = os.getcwd()

dfSegSummaryMaster   = pd.DataFrame()
dfTransitShareMaster = pd.DataFrame()
dfRouteSummaryMaster = pd.DataFrame()
dfSESummaryMaster    = pd.DataFrame()
dfScenariosMaster    = pd.DataFrame()

scenarioID = 0

for index, rowT in tdmVersions.iterrows():

    for index, rowS in segScenarios.iterrows():

        # get scenario path
        _scenarioPath = os.path.join(dirWork, rowT['dirTdmVersion'] + '\\Scenarios\\' + rowS['scenarioName'])

        # check if scenario exists
        if os.path.exists(_scenarioPath):
            print(_scenarioPath)

            try:
                
                scenarioID += 1
                print('ScenarioID: ' + str(scenarioID))

                # get values from Control Center to use to create filenames foor outputs
                _unloadednetprefix = TDM.getControlCenterValue(os.path.join(_scenarioPath, "1ControlCenter.block"),'UnloadedNetPrefix')
                _runid = TDM.getControlCenterValue(os.path.join(_scenarioPath, "1ControlCenter.block"),'RID')

                dfScenarios = pd.DataFrame([
                    [scenarioID, rowT['tdmVersion'],rowT['tdmVersionWithDate'], rowS['scenarioName'], rowS['scenarioGroup'], rowS['scenarioYear']]
                ], columns = (['scenarioID','tdmVersion','tdmVersionWithDate','scenarioName','scenarioGroup','scenarioYear']))
                dfScenariosMaster = pd.concat([dfScenariosMaster, dfScenarios])
                
                # get and join all segment summaries
                dirSegSummary = _scenarioPath + '\\5_AssignHwy\\4_Summaries\\' + _unloadednetprefix + '_Summary_SEGID.csv'
                dfSegSummary = pd.read_csv(dirSegSummary)
                dfSegSummary['scenarioID'] = scenarioID
                dfSegSummary.insert(0, 'scenarioID', dfSegSummary.pop('scenarioID')) # move scenario id to first column
                dfSegSummaryMaster = pd.concat([dfSegSummaryMaster, dfSegSummary])

                # get and join all transit route tables
                dirRouteSummary = _scenarioPath + '\\4_ModeChoice\\3_TransitAssign\\_' + _runid + '_1_PA_Route.dbf'
                dfRouteSummary = pd.DataFrame(DBF(dirRouteSummary, load=True))
                dfRouteSummary['scenarioID'] = scenarioID
                dfRouteSummary.insert(0, 'scenarioID', dfRouteSummary.pop('scenarioID')) # move scenario id to first column
                dfRouteSummaryMaster = pd.concat([dfRouteSummaryMaster, dfRouteSummary])

                # get and join all se input tables
                dirSESummary = _scenarioPath + '\\0_InputProcessing\\SE_File_' + _runid + '.dbf'
                dfSESummary = pd.DataFrame(DBF(dirSESummary, load=True))
                dfSESummary['scenarioID'] = scenarioID
                dfSESummary.insert(0, 'scenarioID', dfSESummary.pop('scenarioID')) # move scenario id to first column
                dfSESummaryMaster = pd.concat([dfSESummaryMaster, dfSESummary])

                # get and join all transit share data
                dirTransitShare = _scenarioPath + '\\4_ModeChoice\\4_Shares\\_Shares_Summary_long.csv'
                dfTransitShare = pd.read_csv(dirTransitShare)
                dfTransitShare['scenarioID'] = scenarioID
                dfTransitShare.insert(0, 'scenarioID', dfTransitShare.pop('scenarioID')) # move scenario id to first column
                dfTransitShareMaster = pd.concat([dfTransitShareMaster, dfTransitShare])

            except FileNotFoundError as f:
                print(f)
                continue


\\modelace\ModelAce-E\1 - TDM\1 - Official Release (full run)\v8.3.2\WF TDM v8.3.2 - 2022-02-04a\Scenarios\BY_2019
ScenarioID: 1
\\modelace\ModelAce-E\1 - TDM\1 - Official Release (full run)\v8.3.2\WF TDM v8.3.2 - 2022-02-04a\Scenarios\RTP_2030
ScenarioID: 2
\\modelace\ModelAce-E\1 - TDM\1 - Official Release (full run)\v8.3.2\WF TDM v8.3.2 - 2022-02-04a\Scenarios\RTP_2040
ScenarioID: 3
\\modelace\ModelAce-E\1 - TDM\1 - Official Release (full run)\v8.3.2\WF TDM v8.3.2 - 2022-02-04a\Scenarios\RTP_2050
ScenarioID: 4
\\modelace\ModelAce-E\1 - TDM\1 - Official Release (full run)\v8.3.2\WF TDM v8.3.2 - 2022-02-04a\Scenarios\TIP_2024
ScenarioID: 5
\\modelace\ModelAce-E\1 - TDM\1 - Official Release (full run)\v8.3.2\WF TDM v8.3.2 - 2022-09-21\Scenarios\BY_2019
ScenarioID: 6
\\modelace\ModelAce-E\1 - TDM\1 - Official Release (full run)\v8.3.2\WF TDM v8.3.2 - 2022-09-21\Scenarios\BY_2023
ScenarioID: 7
\\modelace\ModelAce-E\1 - TDM\1 - Official Release (full run)\v8.3.2\WF TDM v8.3.2 - 2022-09-21

# Create New Scenario Output Data as BigQuery Files

In [6]:
# create GBQ tables
#pandas_gbq.to_gbq(dfScenariosMaster   , 'tdm_scenarios_output.scenarios'      , project_id="tdm-scenarios")
#pandas_gbq.to_gbq(dfSegSummaryMaster  , 'tdm_scenarios_output.segment_summary', project_id="tdm-scenarios")
#pandas_gbq.to_gbq(dfTransitShareMaster, 'tdm_scenarios_output.transit_share'  , project_id="tdm-scenarios")
#pandas_gbq.to_gbq(dfRouteSummaryMaster, 'tdm_scenarios_output.route_summary'  , project_id="tdm-scenarios")
#pandas_gbq.to_gbq(dfSESummaryMaster   , 'tdm_scenarios_output.se_table'       , project_id="tdm-scenarios")


# Append New Scenario Output Data to BigQuery Files

In [181]:
# functions
def update_gbq_table(tableID, df):
    #client.query("DELETE * FROM " + tableID + "WHERE scenarioID > 0")
    job_config = bigquery.LoadJobConfig(write_disposition="WRITE_TRUNCATE",)
     # Make an API request.
    job = client.load_table_from_dataframe(
        df, 
        tableID, 
        job_config
    ) 
    # Wait for the job to complete.
    job.result()

def append_new_rows(tableID, df):
    dfGBQ = client.query("SELECT * FROM " + tableID).to_dataframe()
    dfGBQList = list(set((dfGBQ.scenarioID.tolist())))
    df2 = df[~(df.scenarioID.isin(dfGBQList))]
    return(df2)

In [182]:
# determine new rows to be added to GBQ tables
dfScenariosMaster2 = append_new_rows('tdm-scenarios.tdm_scenarios_output.scenarios', dfScenariosMaster)
dfSegSummaryMaster2 = append_new_rows('tdm-scenarios.tdm_scenarios_output.segment_summary', dfSegSummaryMaster)
dfTransitShareMaster2 = append_new_rows('tdm-scenarios.tdm_scenarios_output.scenarios', dfTransitShareMaster)
dfRouteSummaryMaster2 = append_new_rows('tdm-scenarios.tdm_scenarios_output.route_summary', dfRouteSummaryMaster)
dfSESummaryMaster2 = append_new_rows('tdm-scenarios.tdm_scenarios_output.se_table', dfSESummaryMaster)

In [183]:
# add new scenario rows to GBQ tables
update_gbq_table('tdm-scenarios.tdm_scenarios_output.scenarios', dfScenariosMaster2)
update_gbq_table('tdm-scenarios.tdm_scenarios_output.segment_summary', dfSegSummaryMaster2)
update_gbq_table('tdm-scenarios.tdm_scenarios_output.transit_share', dfTransitShareMaster2)
update_gbq_table('tdm-scenarios.tdm_scenarios_output.route_summary', dfRouteSummaryMaster2)
update_gbq_table('tdm-scenarios.tdm_scenarios_output.se_table', dfSESummaryMaster2)

# Notes

In [168]:
dfGBQ = client.query("SELECT * FROM " + 'tdm-scenarios.tdm_scenarios_output.scenarios').to_dataframe()
dfGBQList = list(set((dfGBQ.scenarioID.tolist())))[0:20]
df3 = dfScenariosMaster[~(dfScenariosMaster.scenarioID.isin(dfGBQList))]



In [67]:
job = client.query("DELETE FROM " + 'tdm-scenarios.tdm_scenarios_output.scenarios' + " WHERE scenarioID > 0")
job.result()

Forbidden: 403 Billing has not been enabled for this project. Enable billing at https://console.cloud.google.com/billing. DML queries are not allowed in the free tier. Set up a billing account to remove this restriction.

Location: us-west3
Job ID: e1725aee-d386-469b-b1b6-47656ed242e2


In [None]:
# update scenarios
table_id_1 = 'tdm-scenarios.tdm_scenarios_output.scenarios'
job = client.load_table_from_dataframe(
    dfScenariosMaster, 
    table_id_1, 
    job_config
)  # Make an API request.
job.result()  # Wait for the job to complete.
table = client.get_table(table_id_1)  # Make an API request.

In [27]:
dfTemp = client.query("SELECT * FROM `tdm-scenarios.tdm_scenarios_output.scenarios` LIMIT 1000").to_dataframe()
dfTemp

Unnamed: 0,scenarioID,tdmVersion,scenarioName
0,1,v832,BY_2019
1,2,v832,BY_2023
2,3,v832,Needs_2032
3,4,v832,Needs_2042
4,5,v832,Needs_2050
5,6,v832,NoBuild_2032
6,7,v832,NoBuild_2042
7,8,v832,NoBuild_2050
8,9,v832,RTP_2032
9,10,v832,RTP_2042


In [20]:
client.get_table("tdm-scenarios.tdm_scenarios_output.scenarios")  # Make an API request.

Forbidden: 403 GET https://bigquery.googleapis.com/bigquery/v2/projects/tdm-scenarios/datasets/tdm_scenarios_output/tables/scenarios?prettyPrint=false: Access Denied: Table tdm-scenarios:tdm_scenarios_output.scenarios: Permission bigquery.tables.get denied on table tdm-scenarios:tdm_scenarios_output.scenarios (or it may not exist).

In [26]:
table_id_s = 'tdm-scenarios.tdm_scenarios_output.segment_summary'
job_config = bigquery.LoadJobConfig(write_disposition="WRITE_TRUNCATE")
job = client.load_table_from_dataframe(dfSegSummaryMaster, table_id_s, job_config)  # Make an API request.
job.result()  # Wait for the job to complete.
table = client.get_table(table_id_s)  # Make an API request.



In [28]:
display(table)

Table(TableReference(DatasetReference('tdm-scenarios', 'tdm_scenarios_output'), 'segment_summary'))

In [6]:
# Still To Do:
#   - Start seaborn plot comparison code (in new folder?)
#   - Figure out how to "apped" the tables I created to BigQuery!
#       - append both id table and other tables -- bill is showing example