# Testing the scripts for Consolidated App

In [1]:
import os
import re as re

import geopandas as gpd
import numpy as np
import pandas as pd

from collections import Counter
from itertools import chain, combinations

import shared_utils
from calitp import *
from shared_utils import utils
from siuba import *

GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/consolidated_applications/"


from calitp.storage import get_fs
fs = get_fs()



In [2]:
#For NB only
pd.set_option("display.max_colwidth", None)
pd.options.display.float_format = "{:.2f}".format
pd.options.display.max_columns = 50

## Load in original sheets

In [3]:
FILE_NAME = "Con_App_Cleaned.xlsx"
melted_df_og = pd.read_excel(f"{GCS_FILE_PATH}{FILE_NAME}", sheet_name="pivoted_data")

cleaned_unpivoted_og = pd.read_excel(
    f"{GCS_FILE_PATH}{FILE_NAME}", sheet_name="cleaned_unpivoted_data"
)
grouped_og = pd.read_excel(
    f"{GCS_FILE_PATH}{FILE_NAME}", sheet_name="combos_of_funding_programs"
)
gdf_og = gpd.read_parquet(f"{GCS_FILE_PATH}con_app_gdf.parquet")

## Function for comparing total funding amount versus total estimated expense

In [4]:

def funding_vs_expenses(df):
    if df["total_state_federal_local_funding"] == df["total_expenses"]:
        return "Fully funded"
    elif df["total_state_federal_local_funding"] > df["total_expenses"]:
        return "Funding exceeds total expenses"
    else:
        return "Not fully funded"


## Function for loading in the excel

In [5]:
def load_con_app():
    #We keep all the columns 
    con_app_file =  "Copy of Application_Review_Report_5_2_2022.xls"
    con_app =  to_snakecase(
    pd.read_excel(f"{GCS_FILE_PATH}{con_app_file}"))
    return con_app

In [6]:
df1 = load_con_app()

In [7]:
df1.shape

(346, 24)

## Function for initial cleaning 

In [8]:
def initial_cleaning(df):
    ### ORG NAMES ###
    #Replace Ventura County since it read in strangely
    df["organization_name"] = df["organization_name"].replace(
    {"Ventura County Transportation Commission\xa0": "Ventura County Transportation Commission"})
    
    # Remove any acronyms
    df["organization_name"] = df["organization_name"].str.replace(
    "\s+\(.*$", "", regex=True)
   
    ### PROJECT CATEGORIES ### 
    #Spell out project categories 
    df["project_category"] = df["project_category"].replace(
    {"OP": "Operating", "CA": "Capital", "PL": "Planning", "CM": "Capital Maintenance"})
    
    #Project categories are pretty vague, but project description has 200+ diff inputs
    #Search through descriptions for the keywords below and input keyword into the new column "short description"
    df["project_description"] = df["project_description"].str.lower()
    df["short_description"] = df["project_description"].str.extract(
    "(operating|bus|construction|buses|planning|van|vessel|fare|ridership|vehicle|station|service|equipment|maintenance|surveillance|renovate|free|equip|operational)",
    expand=False)
    #Replace the keywords with the main categories
    #Capture any entries that don't fall into a particular category.
    #Change this column to title case for cleaner look
    df["short_description"] = df["short_description"].replace(
    {
        "operating": "operating assistance",
        "operational": "operating assistance",
        "free": "free fare program",
        "ridership": "ridership expansion",
        "fare": "purchasing other tech",
        "service": "service expansion",
        "buses": "purchasing vehicles",
        "bus": "purchasing vehicles",
        "van": "purchasing vehicles",
        "vessel": "purchasing vehicles",
        "vehicles": "purchasing vehicles",
        "vehicle": "purchasing vehicles",
        "planning": "transit planning",
        "station": "construction",
        "construction": "construction",
        "maintenance": "maintenance/renovation",
        "renovate": "maintenance/renovation",
        "equipment": "purchasing other tech",
        "equip": "purchasing other tech",
        "surveillance": "purchasing other tech"})
    
    df["short_description"] = (
    df["short_description"].fillna("other category").str.title())
    ### MONETARY COLS ### 
    #Local totals: split on ":" and extract only the last item
    #To grab the total of local funding a proejct will have/has
    df["local_total"] = df["local_total"].str.split(": ").str[-1]
    
    #Remove $ and , turn column from str into float
    df["local_total"] = (
    df["local_total"]
    .str.replace(",", "", regex=True)
    .str.replace("$", "", regex= True)
    .fillna(0)
    .astype("float")) 
    
    #Grab list of the rest of the monetary cols
    monetary_cols = [
    "total_expenses",
    "_5311_funds",
    "_5311_f__funds",
    "_5311_cmaq_funds",
    "_5339_funds",
    "federal_total",
    "other_fed_funds_total",
    "lctop__state__funds",
    "sb1__state_of_good_repair__state__funds",
    "transit_development_act__state__funds",
    "other_state_funds",
    "state_total"]
    
    #Clean them all up
    df[monetary_cols] = (
    df[monetary_cols]
    .fillna(value=0)
    .apply(pd.to_numeric, errors="coerce")
    .astype("float"))
    
    #Create three new cols: total for local, state, and fed
    # and total for state and local funds only
    df = df.assign(
    total_state_federal_local_funding = (df["state_total"]
    + df["local_total"]
    + df["federal_total"]
    + df["other_fed_funds_total"]),    
    total_state_fed_only = 
    (df["state_total"] + df["federal_total"])) 
    
    #Apply function to determine if a project is fully funded or not
    df['fully_funded'] = df.apply(funding_vs_expenses, axis=1)
    
     
    ### DISTRICTS ### 
    #Find any rows with missing values in the district column: 
    #no_districts = data[data["district"].isnull()]
    #no_districts_list = no_districts["project_upin"].tolist()
    
    #Replace the districts by organization names
    df.loc[(df["organization_name"] == "City of Banning"), "district"] = 8
    df.loc[(df["organization_name"] == "City of Clovis"), "district"] = 6
    df.loc[(df["organization_name"] == "City of Los Angeles DOT"), "district"] = 7
    df.loc[(df["organization_name"] == "Peninsula Corridor Joint Powers Board"), "district"] = 4
    df.loc[(df["organization_name"] == "San Joaquin Regional Rail Commission"), "district"] = 10
    df.loc[(df["organization_name"] == "Western Contra Costa Transit Authority"), "district"] = 4
    
    # Create new column with fully spelled out names
    df["full_district_name"] = df["district"].replace(
    {
        7: "District 7: Los Angeles",
        4: "District 4: Bay Area / Oakland",
        2: "District 2: Redding",
        9: "District 9: Bishop",
        10: "District 10: Stockton",
        11: "District 11: San Diego",
        3: "District 3: Marysville / Sacramento",
        12: "District 12: Orange County",
        8: "District 8: San Bernardino / Riverside",
        5: "District 5: San Luis Obispo / Santa Barbara",
        6: "District 6: Fresno / Bakersfield",
        1: "District 1: Eureka",
    })
    return df
   

In [9]:
df2 = initial_cleaning(df1)

### Different than original df because some cols are int64, some are int32

In [10]:
assert set(df2.columns) == set(cleaned_unpivoted_og.columns)
assert df2.shape == cleaned_unpivoted_og.shape

In [11]:
cleaned_unpivoted_og.head(3)

Unnamed: 0,organization_name,district,application_name,year,application_status,project_upin,project_category,project_line_item__ali_,project_description,is_stimulus,consolidated_application,total_expenses,_5311_funds,_5311_f__funds,_5311_cmaq_funds,_5339_funds,federal_total,other_fed_funds_total,lctop__state__funds,sb1__state_of_good_repair__state__funds,transit_development_act__state__funds,other_state_funds,state_total,local_total,short_description,total_state_federal_local_funding,total_state_fed_only,fully_funded,full_district_name
0,Alameda-Contra Costa Transit District,4,Consolidated Application (Consolidated Application),2022,Submitted,BCG0003649,Operating,300901,operating assistance (lctop only - project # 1),No,Yes,27795000,0,0,0,0,0,0,6546867.0,0,0.0,0.0,6546867.0,21248133.0,Operating Assistance,27795000.0,6546867.0,Fully funded,District 4: Bay Area / Oakland
1,Alameda-Contra Costa Transit District,4,Consolidated Application (Consolidated Application),2022,Submitted,BCG0003684,Capital,116220,purchase misc communications equip (lctop only - project # 2),No,Yes,1881663,0,0,0,0,0,0,1631663.0,0,0.0,0.0,1631663.0,0.0,Purchasing Other Tech,1631663.0,1631663.0,Not fully funded,District 4: Bay Area / Oakland
2,Amador Transit,10,Consolidated Application (Consolidated Application),2022,Submitted,BCG0003876,Operating,300902,operating assistance sliding scale,No,Yes,853394,311252,0,0,0,311252,0,94472.0,0,0.0,0.0,94472.0,465000.0,Operating Assistance,870724.0,405724.0,Funding exceeds total expenses,District 10: Stockton


In [12]:
df2.head(3)

Unnamed: 0,organization_name,district,application_name,year,application_status,project_upin,project_category,project_line_item__ali_,project_description,is_stimulus,consolidated_application,total_expenses,_5311_funds,_5311_f__funds,_5311_cmaq_funds,_5339_funds,federal_total,other_fed_funds_total,lctop__state__funds,sb1__state_of_good_repair__state__funds,transit_development_act__state__funds,other_state_funds,state_total,local_total,short_description,total_state_federal_local_funding,total_state_fed_only,fully_funded,full_district_name
0,Alameda-Contra Costa Transit District,4.0,Consolidated Application (Consolidated Application),2022,Submitted,BCG0003649,Operating,300901,operating assistance (lctop only - project # 1),No,Yes,27795000.0,0.0,0.0,0.0,0.0,0.0,0.0,6546867.0,0.0,0.0,0.0,6546867.0,21248133.0,Operating Assistance,27795000.0,6546867.0,Fully funded,District 4: Bay Area / Oakland
1,Alameda-Contra Costa Transit District,4.0,Consolidated Application (Consolidated Application),2022,Submitted,BCG0003684,Capital,116220,purchase misc communications equip (lctop only - project # 2),No,Yes,1881663.0,0.0,0.0,0.0,0.0,0.0,0.0,1631663.0,0.0,0.0,0.0,1631663.0,0.0,Purchasing Other Tech,1631663.0,1631663.0,Not fully funded,District 4: Bay Area / Oakland
2,Amador Transit,10.0,Consolidated Application (Consolidated Application),2022,Submitted,BCG0003876,Operating,300902,operating assistance sliding scale,No,Yes,853394.0,311252.0,0.0,0.0,0.0,311252.0,0.0,94472.0,0.0,0.0,0.0,94472.0,465000.0,Operating Assistance,870724.0,405724.0,Funding exceeds total expenses,District 10: Stockton


In [13]:
cleaned_unpivoted_og['district'].dtype

dtype('int64')

In [14]:
df2['district'].dtype

dtype('float64')

In [15]:
df2 = df2.astype({"district": "int64"})

In [16]:
df2.equals(cleaned_unpivoted_og)

False

In [17]:
df2_numeric = df2[['_5311_funds',
       '_5311_f__funds', '_5311_cmaq_funds', '_5339_funds', 'federal_total',
       'other_fed_funds_total', 'lctop__state__funds',
       'sb1__state_of_good_repair__state__funds',
       'transit_development_act__state__funds', 'other_state_funds',
       'state_total', 'local_total', 
       'total_state_federal_local_funding', 'total_state_fed_only',
       ]].astype('int64')

In [18]:
cleaned_unpivoted_og_numeric = cleaned_unpivoted_og[['_5311_funds',
       '_5311_f__funds', '_5311_cmaq_funds', '_5339_funds', 'federal_total',
       'other_fed_funds_total', 'lctop__state__funds',
       'sb1__state_of_good_repair__state__funds',
       'transit_development_act__state__funds', 'other_state_funds',
       'state_total', 'local_total', 
       'total_state_federal_local_funding', 'total_state_fed_only',]].astype('int64')

In [19]:
df2_numeric.equals(cleaned_unpivoted_og_numeric)

True

## Melted DF Function

In [20]:
def melt_df(df): 
    # Keep only subset of what I want to melt & the identifier column 
    # Which is the project_upin: a unique identifier for each product
    
    melt_subset1 = df[
    [
        "project_upin",
        "_5311_funds",
        "_5311_f__funds",
        "_5311_cmaq_funds",
        "_5339_funds",
        "lctop__state__funds",
        "sb1__state_of_good_repair__state__funds",
        "transit_development_act__state__funds",
        "other_state_funds",
        "other_fed_funds_total",
        "local_total",
        "federal_total",
        "state_total",
    ]]
    
    #Melt the df: put funds (value_vars) beneath value_name and the
    #associated funding amounts under the column "funding received."
    melt_subset2 = pd.melt(
    melt_subset1, #subsetted df 
    id_vars=["project_upin"],
    value_vars=[
        "_5311_funds",
        "_5311_f__funds",
        "_5311_cmaq_funds",
        "_5339_funds",
        "lctop__state__funds",
        "sb1__state_of_good_repair__state__funds",
        "transit_development_act__state__funds",
        "other_state_funds",
        "other_fed_funds_total",
        "local_total",
        "federal_total",
        "state_total",
    ],
    var_name="program_name",
    value_name="funding_received",)
    
    # Create a subset of the original df 
    # To merge it onto our melted df so we can info such as project description
    # Fully funded or not, etc 
    df2 = df[
    [
        "total_expenses",
        "organization_name",
        "district",
        "full_district_name",
        "year",
        "application_status",
        "project_upin",
        "project_category",
        "project_line_item__ali_",
        "project_description",
        "is_stimulus",
        "total_state_federal_local_funding",
        "fully_funded",
        "short_description",
    ]]
    
    # Left merge with melted dataframe, which will has MANY more lines 
    m1 = pd.merge(melt_subset2, df2, on="project_upin", how="left")
    
    # Rename funds for clarity 
    m1["program_name"] = m1["program_name"].replace(
    {
        "_5311_funds": "5311 (Fed)",
        "lctop__state__funds": "LCTOP (State)",
        "transit_development_act__state__funds": "Transit Development Act (State)",
        "other_state_funds": "Other State Funds",
        "_5339_funds": "5339 (Fed)",
        "_5311_f__funds": "5311(f) (Fed)",
        "sb1__state_of_good_repair__state__funds": "SB1. State of Good Repair (State)",
        "other_fed_funds_total": "Other Federal Funds",
        "_5311_cmaq_funds": "5311 CMAQ (Fed)",
        "local_total": "Local Funds",
        "federal_total": "Federal Total",
        "state_total": "State Total",
    }) 
    
    # Filter out excess rows with $0 in the col "funding_received"
    # To shorten dataframe 
    m1 = m1[m1["funding_received"] > 0]
    return m1 


In [21]:
df3 = melt_df(df2)

### Check with original

In [22]:
df3['total_expenses'] = df3['total_expenses'].astype('int64')

In [23]:
df3.head(3)

Unnamed: 0,project_upin,program_name,funding_received,total_expenses,organization_name,district,full_district_name,year,application_status,project_category,project_line_item__ali_,project_description,is_stimulus,total_state_federal_local_funding,fully_funded,short_description
2,BCG0003876,5311 (Fed),311252.0,853394,Amador Transit,10,District 10: Stockton,2022,Submitted,Operating,300902,operating assistance sliding scale,No,870724.0,Funding exceeds total expenses,Operating Assistance
3,BCG0003877,5311 (Fed),317477.0,916170,Amador Transit,10,District 10: Stockton,2022,Submitted,Operating,300902,operating assistance sliding scale,No,932477.0,Funding exceeds total expenses,Operating Assistance
8,BCG0003914,5311 (Fed),995458.0,1886992,Butte County Association of Governments/ Butte Regional Transit,3,District 3: Marysville / Sacramento,2022,Submitted,Operating,300902,operating assistance sliding scale,No,1886993.0,Funding exceeds total expenses,Operating Assistance


In [24]:
melted_df_og.head(3)

Unnamed: 0,project_upin,program_name,funding_received,total_expenses,organization_name,district,full_district_name,year,application_status,project_category,project_line_item__ali_,project_description,is_stimulus,total_state_federal_local_funding,fully_funded,short_description
0,BCG0003876,5311 (Fed),311252.0,853394,Amador Transit,10,District 10: Stockton,2022,Submitted,Operating,300902,operating assistance sliding scale,No,870724.0,Funding exceeds total expenses,Operating Assistance
1,BCG0003877,5311 (Fed),317477.0,916170,Amador Transit,10,District 10: Stockton,2022,Submitted,Operating,300902,operating assistance sliding scale,No,932477.0,Funding exceeds total expenses,Operating Assistance
2,BCG0003914,5311 (Fed),995458.0,1886992,Butte County Association of Governments/ Butte Regional Transit,3,District 3: Marysville / Sacramento,2022,Submitted,Operating,300902,operating assistance sliding scale,No,1886993.0,Funding exceeds total expenses,Operating Assistance


In [25]:
melted_df_og.equals(df3)

False

In [39]:
cols = ['project_upin','funding_received', 'total_expenses',
       'district',  'year',
     
       'total_state_federal_local_funding', 'fully_funded',
       ]

In [40]:
def pick_column_and_aggregate(df1, df2, col):
    if df1[col].sum() == df2[col].sum():
        print("PASS")
    else:
        print(f"{col}: FAIL")

In [42]:
for c in cols:
    pick_column_and_aggregate(df3, melted_df_og, c)

PASS
PASS
PASS
PASS
PASS
PASS
PASS


In [28]:
assert set(df3.columns) == set(melted_df_og.columns)
assert df3.shape == melted_df_og.shape

In [46]:
set(df3.columns).difference(set(melted_df_og.columns))

set()

## Grouped DF 

In [70]:
def grouped_df(melted_df, initial_clean_df):
    # Exclude totals: not a fund 
    grouped1 = melted_df.loc[
    ~melted_df["program_name"].isin(
        [
            "Local Funds",
            "Federal Total",
            "State Total",]
    )]
    
    #Grab all the different program names by project upin and put it in a new column
    grouped1["all_programs"] = grouped1.groupby("project_upin")["program_name"].transform(
    lambda x: ",".join(x))
    
    #Keep only cols of int, drop duplicates
    grouped1 = grouped1[["project_upin", "all_programs"]].drop_duplicates()
    
    # Merge with original dataframe because above we only have project_upin and all the funds left
    grouped2 = pd.merge(grouped1, initial_clean_df, on="project_upin", how="left")
    
     # Keep only relevant cols
    grouped2 = grouped2[
    ["project_upin", "organization_name", "project_description", "all_programs", "year"]]
    
    # Count # of funds under "all programs" column 
    # https://stackoverflow.com/questions/51502263/pandas-dataframe-object-has-no-attribute-str
    grouped2["count_of_funding_programs_applied"] = (
    grouped2["all_programs"]
    .str.split(",+")
    .str.len()
    .groupby(grouped2.project_upin)
    .transform("sum"))
    
    return grouped2 


In [71]:
df4 = grouped_df(df3, df2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [73]:
df4.head()

Unnamed: 0,project_upin,organization_name,project_description,all_programs,year,count_of_funding_programs_applied
0,BCG0003876,Amador Transit,operating assistance sliding scale,"5311 (Fed),LCTOP (State)",2022,2
1,BCG0003877,Amador Transit,operating assistance sliding scale,"5311 (Fed),LCTOP (State)",2022,2
2,BCG0003914,Butte County Association of Governments/ Butte Regional Transit,operating assistance sliding scale,"5311 (Fed),Transit Development Act (State)",2022,2
3,BCG0003916,Butte County Association of Governments/ Butte Regional Transit,operating assistance sliding scale,"5311 (Fed),Transit Development Act (State)",2022,2
4,BCG0004040,Calaveras Transit Agency,operating assistance sliding scale - ffy2022,"5311 (Fed),Transit Development Act (State)",2022,2


grouped_og.head()

In [75]:
assert set(df4.columns) == set(grouped_og.columns)
assert df4.shape == grouped_og.shape

In [76]:
df4.equals(grouped_og)

True

In [92]:
grouped_og.shape

(335, 6)

## Geodataframe

In [85]:
def gdf_conapp(df):
    #Load geojson with the shapes of the Caltrans districts
    geojson = (gpd.read_file("https://gis.data.ca.gov/datasets/0144574f750f4ccc88749004aca6eb0c_0.geojson?outSR=%7B%22latestWkid%22%3A3857%2C%22wkid%22%3A102100%7D")
               .to_crs(epsg=4326))
    
    #Keep only the columns we want 
    geojson = geojson[["DISTRICT", "Shape_Length", "Shape_Area", "geometry"]]
    
    #Take the cleaned, unaggregated dataframe and get summarize statistics
    summarized = df.groupby("district").agg(
    {"project_upin": "count", "total_state_fed_only": "sum"}).reset_index()
    
    #New column that rounds total_state_fed to millions
    summarized["funding_millions"] = (
    "$"
    + (summarized["total_state_fed_only"].astype(float) / 1000000)
    .round()
    .astype(str)
    + "M")
    
    #For the map, it looks nicer when the legend is pinned to percentiles instead of 
    #actual dollar amounts.
    p75 = summarized.total_state_fed_only.quantile(0.75).astype(float)
    p25 =summarized.total_state_fed_only.quantile(0.25).astype(float)
    p50 = summarized.total_state_fed_only.quantile(0.50).astype(float)
    
    
    #Function for mapping percentiles 
    def funding_range(row):
        if ((row.total_state_fed_only > 0) and (row.total_state_fed_only < p25)):
            return "25"
        elif ((row.total_state_fed_only > p25) and (row.total_state_fed_only < p75)):
            return "50"
        elif ((row.total_state_fed_only > p50) and (row.total_state_fed_only > p75 )):
               return "75"
        else:
            return "No Info"
        
    #Apply function into a new column
    summarized["funding_percentile"] = summarized.apply(lambda x: funding_range(x), axis=1)
    
    #Merge geojson with the summarized df
    gdf = geojson.merge(
    summarized, how="inner", left_on="DISTRICT", right_on="district") 
    
    #Export 
    shared_utils.utils.geoparquet_gcs_export(gdf,  "gs://calitp-analytics-data/data analyses/consolidated_applications/",
    "script_con_app_gdf")
    
    return summarized

In [86]:
gdf1 = gdf_conapp(df2)


This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.



In [88]:
type(gdf1)

pandas.core.frame.DataFrame

In [90]:
gdf1.shape

(12, 5)

In [91]:
gdf1

Unnamed: 0,district,project_upin,total_state_fed_only,funding_millions,funding_percentile
0,1,30,10983899.0,$11.0M,50
1,2,37,9373942.0,$9.0M,25
2,3,42,29812565.0,$30.0M,50
3,4,47,277215227.08,$277.0M,75
4,5,32,21020813.0,$21.0M,50
5,6,53,76633070.96,$77.0M,75
6,7,15,114012442.0,$114.0M,75
7,8,33,21805615.0,$22.0M,50
8,9,19,4821987.0,$5.0M,25
9,10,28,18923483.75,$19.0M,50


## One function to capture them all 

In [94]:
def con_app_complete_clean():
    #Load in original sheet
    raw_con_app = load_con_app() 
    
    #Do the initial cleaning
    cleaned_con_app = initial_cleaning(raw_con_app) 
    
    #First aggregation: melting the dataframe
    melted_df = melt_df(cleaned_con_app)
    
    #Second aggregation: putting all funding programs onto a single line  
    group_df = grouped_df(melted_df, cleaned_con_app)
    
    #Third aggregation: summarize and turn it into a gdf 
    gdf = gdf_conapp(cleaned_con_app)
    
    #Write the first 3 dfs into an Excel workbook
    with pd.ExcelWriter(f"{GCS_FILE_PATH}Script_Testing.xlsx") as writer:
        melted_df.to_excel(writer, sheet_name="pivoted_data", index=False)
        cleaned_con_app.to_excel(writer, sheet_name="cleaned_unpivoted_data", index=False)
        group_df.to_excel(writer, sheet_name="combos_of_funding_programs", index=False)
    
    return group_df, cleaned_con_app, melted_df


In [95]:
df5, df6, df7 = con_app_complete_clean()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.



In [96]:
df5

Unnamed: 0,project_upin,organization_name,project_description,all_programs,year,count_of_funding_programs_applied
0,BCG0003876,Amador Transit,operating assistance sliding scale,"5311 (Fed),LCTOP (State)",2022,2
1,BCG0003877,Amador Transit,operating assistance sliding scale,"5311 (Fed),LCTOP (State)",2022,2
2,BCG0003914,Butte County Association of Governments/ Butte Regional Transit,operating assistance sliding scale,"5311 (Fed),Transit Development Act (State)",2022,2
3,BCG0003916,Butte County Association of Governments/ Butte Regional Transit,operating assistance sliding scale,"5311 (Fed),Transit Development Act (State)",2022,2
4,BCG0004040,Calaveras Transit Agency,operating assistance sliding scale - ffy2022,"5311 (Fed),Transit Development Act (State)",2022,2
...,...,...,...,...,...,...
330,BCG0003716,City of Union City,electric vehicle charging infrastructure for union city transit fleet (lctop only),Other State Funds,2022,1
331,BCG0003707,Napa Valley Transportation Authority,nvta zero emission bus procurement project (lctop only),Other State Funds,2022,1
332,BCG0003738,San Mateo County Transit District,purchase replacement std 40 ft or larger bus (lctop only),"Other State Funds,Other Federal Funds",2022,2
333,BCG0003705,Santa Barbara Metropolitan Transit District,purchase replacement std 40 ft or larger bus (lctop only),Other State Funds,2022,1
