In [1]:
import os
import pandas as pd
import io
from dbfread import DBF

In [2]:
#google cloud big query libraries
from google.cloud import bigquery
from google.oauth2 import service_account

key_path = r"C:\Users\cday\streetlight-temp-analysis-64775adf5abc.json"

credentials = service_account.Credentials.from_service_account_file(
    key_path, scopes=["https://www.googleapis.com/auth/cloud-platform"],
)
 
client = bigquery.Client(credentials=credentials, project=credentials.project_id,)

In [3]:
sql = """
SELECT
    mode_of_travel,
    origin_zone_name,
    destination_zone_name,
    day_type,
    day_part,
    data_period,
    o_d_traffic_sample_trip_counts,
    o_d_traffic_calibrated_trip_volume
FROM ut-udot-adap-prod.streetlight_data.udot_personal_spring_2019
WHERE
    ((origin_zone_name BETWEEN '350060_1' AND '350060_7') OR 
     (destination_zone_name BETWEEN '350060_1' AND '350060_7'))
"""
df = client.query(sql).to_dataframe()
df.head()

Unnamed: 0,mode_of_travel,origin_zone_name,destination_zone_name,day_type,day_part,data_period,o_d_traffic_sample_trip_counts,o_d_traffic_calibrated_trip_volume
0,All Vehicles - StL All Vehicles Sample Trip Co...,110127_0,350060_1,0: All Days (Mo-Su),3: Mid-Day (9am-3pm),4. Mar-May,23,7.25
1,All Vehicles - StL All Vehicles Sample Trip Co...,350060_1,350036_0,0: All Days (Mo-Su),5: Late PM (6pm-12am),4. Mar-May,46,14.5
2,All Vehicles - StL All Vehicles Sample Trip Co...,350472_0,350060_1,2: Weekend Day (Sa-Su),0: All Day (12am-12am),4. Mar-May,26,29.0
3,All Vehicles - StL All Vehicles Sample Trip Co...,43098_0,350060_1,0: All Days (Mo-Su),0: All Day (12am-12am),4. Mar-May,368,116.0
4,All Vehicles - StL All Vehicles Sample Trip Co...,350060_1,350422_0,0: All Days (Mo-Su),0: All Day (12am-12am),4. Mar-May,69,21.75


# Read all Scenario Outputs 

In [3]:
tdmVersions = pd.DataFrame([
    ['v832', '\\\modelace\\ModelAce-E\\1 - TDM\\1 - Official Release (full run)\\v8.3.2\\WF TDM v8.3.2 - 2022-09-21'],
    ['v9',   '\\\modelace\\ModelAce-D\\1 - TDM\\3 - Model Dev\\1 - WF\\2 - Sandbox\\v9.0Beta\WF TDM v9.0 - 2022-12-19']
], columns = (['tdmVersion', 'dirTdmVersion']))

segScenarios = pd.DataFrame([
    ['BY_2019'     ,'_SE19_Net19_Summary_SEGID'          , '_SE19_Net19_1_PA_Route'        , '_SE19_Net19'        ],
    ['BY_2023'     ,'_SE23_Net23_Summary_SEGID'          , '_SE23_Net23_1_PA_Route'        , '_SE23_Net23'        ],
    ['EF_RTP_2032' ,'_ExtForces_SE32_Net32_Summary_SEGID', '_SE32_Net32_1_PA_Route'        , '_SE32_Net32'        ],
    ['EF_RTP_2042' ,'_ExtForces_SE42_Net42_Summary_SEGID', '_SE42_Net42_1_PA_Route'        , '_SE42_Net42'        ],
    ['EF_RTP_2050' ,'_ExtForces_SE50_Net50_Summary_SEGID', '_SE50_Net50_1_PA_Route'        , '_SE50_Net50_EF'     ],
    ['Needs_2032'  ,'_Needs_SE32_Net32_Summary_SEGID'    , '_SE32_Net32_Needs_1_PA_Route'  , '_SE32_Net32_Needs'  ],
    ['Needs_2042'  ,'_Needs_SE42_Net42_Summary_SEGID'    , '_SE42_Net42_Needs_1_PA_Route'  , '_SE42_Net42_Needs'  ],
    ['Needs_2050'  ,'_Needs_SE50_Net50_Summary_SEGID'    , '_SE50_Net50_Needs_1_PA_Route'  , '_SE50_Net50_Needs'  ],
    ['NoBuild_2032','_NoBuild_SE32_Net28_Summary_SEGID'  , '_SE32_Net28_NoBuild_1_PA_Route', '_SE32_Net28_NoBuild'],
    ['NoBuild_2042','_NoBuild_SE42_Net28_Summary_SEGID'  , '_SE42_Net28_NoBuild_1_PA_Route', '_SE42_Net28_NoBuild'],
    ['NoBuild_2050','_NoBuild_SE50_Net28_Summary_SEGID'  , '_SE50_Net28_NoBuild_1_PA_Route', '_SE50_Net28_NoBuild'],
    ['RTP_2032'    ,'_RTP_SE32_Net32_Summary_SEGID'      , '_SE32_Net32_1_PA_Route'        , '_SE32_Net32'        ],
    ['RTP_2042'    ,'_RTP_SE42_Net42_Summary_SEGID'      , '_SE42_Net42_1_PA_Route'        , '_SE42_Net42'        ],
    ['RTP_2050'    ,'_RTP_SE50_Net50_Summary_SEGID'      , '_SE50_Net50_1_PA_Route'        , '_SE50_Net50'        ],
    ['TIP_2028'    ,'_TIP_SE28_Net28_Summary_SEGID'      , '_SE28_Net28_1_PA_Route'        , '_SE28_Net28'        ]
], columns = (['ScenarioName','segSummaryCsvName','routeSummaryDbfName','seSummaryDbfName']))

In [4]:
dirWork    = os.getcwd()

lstScenarios = list()
lstSegSummary = list()
lstRouteSummary = list()
lstSESummary = list()
scenarioID = 1

for tdmNum in range(len(tdmVersions)):
    for scenarioNum in range(len(segScenarios)):
        try:
            # get and join all segment summaries
            dirSegSummary = os.path.join(dirWork, tdmVersions.iloc[tdmNum,1] + '\\Scenarios\\' + segScenarios.iloc[scenarioNum,0] + '\\5_AssignHwy\\4_Summaries\\' + tdmVersions.iloc[tdmNum,0] +  segScenarios.iloc[scenarioNum,1] + '.csv')
            dfSegSummary = pd.read_csv(dirSegSummary)
            #dfSegSummary['tdmRelease'] = tdmVersions.iloc[tdmNum,0]
            #dfSegSummary['scenarioName'] = segScenarios.iloc[scenarioNum,0]
            dfSegSummary['scenarioID'] = scenarioID
            lstSegSummary.append(dfSegSummary)

            # get and join all transit share data


            # get and join all transit route tables
            dirRouteSummary = os.path.join(dirWork, tdmVersions.iloc[tdmNum,1] + '\\Scenarios\\' + segScenarios.iloc[scenarioNum,0] + '\\4_ModeChoice\\3_TransitAssign\\_' + tdmVersions.iloc[tdmNum,0] +  segScenarios.iloc[scenarioNum,2] + '.dbf')
            dfRouteSummary = pd.DataFrame(DBF(dirRouteSummary, load=True))
            #dfRouteSummary['tdmRelease'] = tdmVersions.iloc[tdmNum,0]
            #dfRouteSummary['scenarioName'] = segScenarios.iloc[scenarioNum,0]
            dfRouteSummary['scenarioID'] = scenarioID
            lstRouteSummary.append(dfRouteSummary)

            # get and join all se input tables
            dirSESummary = os.path.join(dirWork, tdmVersions.iloc[tdmNum,1] + '\\Scenarios\\' + segScenarios.iloc[scenarioNum,0] + '\\0_InputProcessing\\SE_File_' + tdmVersions.iloc[tdmNum,0] +  segScenarios.iloc[scenarioNum,3] + '.dbf')
            dfSESummary = pd.DataFrame(DBF(dirSESummary, load=True))
            #dfSESummary['tdmRelease'] = tdmVersions.iloc[tdmNum,0]
            #dfSESummary['scenarioName'] = segScenarios.iloc[scenarioNum,0]
            dfSESummary['scenarioID'] = scenarioID
            lstSESummary.append(dfSESummary)
        
            dfScenarios = pd.DataFrame([
                [scenarioID, tdmVersions.iloc[tdmNum,0], segScenarios.iloc[scenarioNum,0]]
            ], columns = (['scenarioID','tdmVersion','scenarioName']))
            lstScenarios.append(dfScenarios)

            scenarioID += 1

        except FileNotFoundError:
            continue


dfSegSummaryMaster = pd.concat(lstSegSummary)
dfRouteSummaryMaster = pd.concat(lstRouteSummary)
dfSESummaryMastser = pd.concat(lstSESummary)
dfScenariosMaster = pd.concat(lstScenarios)



In [6]:
# Still To Do:
#   - Start seaborn plot comparison code (in new folder?)
#   - Figure out how to "apped" the tables I created to BigQuery!
#       - append both id table and other tables -- bill is showing example

In [7]:
# This code could be used to upload a df from python onto bigquery -- but Bill suggests its clearer to append instead of replace

import pandas
import pandas_gbq

# TODO: Set project_id to your Google Cloud Platform project ID.
project_id = "tdm-scenarios"

# TODO: Set table_id to the full destination table ID (including the
#       dataset ID).
table_id = 'tdm_scenarios_output.scenarios'

pandas_gbq.to_gbq(dfScenariosMaster, table_id, project_id=project_id)