# Consolidated application data

In [1]:
import os
import re as re

import geopandas as gpd
import numpy as np
import pandas as pd

pd.options.display.max_columns = 50
pd.options.display.max_rows = 250
pd.set_option("display.max_colwidth", None)
pd.options.display.float_format = "{:.2f}".format

from collections import Counter
from itertools import chain, combinations

import shared_utils
from calitp import *
from shared_utils import utils
from siuba import *

GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/consolidated_applications/"

from calitp.storage import get_fs

fs = get_fs()



In [2]:
data = to_snakecase(
    pd.read_excel(f"{GCS_FILE_PATH}Copy of Application_Review_Report_5_2_2022.xls")
)

In [3]:
data.shape

(346, 24)

In [4]:
data.isna().sum()

organization_name                            0
district                                     6
application_name                             0
year                                         0
application_status                           0
project_upin                                 0
project_category                             0
project_line_item__ali_                      0
project_description                          0
is_stimulus                                  0
consolidated_application                     0
total_expenses                               0
_5311_funds                                  7
_5311_f__funds                               7
_5311_cmaq_funds                             7
_5339_funds                                  7
federal_total                                0
other_fed_funds_total                        7
lctop__state__funds                          7
sb1__state_of_good_repair__state__funds      7
transit_development_act__state__funds        7
other_state_f

In [5]:
data.dtypes

organization_name                           object
district                                   float64
application_name                            object
year                                         int64
application_status                          object
project_upin                                object
project_category                            object
project_line_item__ali_                     object
project_description                         object
is_stimulus                                 object
consolidated_application                    object
total_expenses                               int64
_5311_funds                                float64
_5311_f__funds                             float64
_5311_cmaq_funds                           float64
_5339_funds                                float64
federal_total                                int64
other_fed_funds_total                      float64
lctop__state__funds                        float64
sb1__state_of_good_repair__stat

In [6]:
data.loc[data["application_status"] == "Not Submitted"]

Unnamed: 0,organization_name,district,application_name,year,application_status,project_upin,project_category,project_line_item__ali_,project_description,is_stimulus,consolidated_application,total_expenses,_5311_funds,_5311_f__funds,_5311_cmaq_funds,_5339_funds,federal_total,other_fed_funds_total,lctop__state__funds,sb1__state_of_good_repair__state__funds,transit_development_act__state__funds,other_state_funds,state_total,local_total
82,City of Tracy,10.0,Consolidated Application (Consolidated Application),2022,Not Submitted,BCG0003786,OP,300901,Free Fares Program (LCTOP ONLY),No,Yes,0,0.0,0.0,0.0,0.0,0,0.0,67412.0,0.0,0.0,0.0,67412.0,
102,County of Siskiyou,2.0,Consolidated Application (Consolidated Application),2022,Not Submitted,BCG0003809,CA,119302,Construction - Bus Shelters (LCTOP ONLY),No,Yes,112541,0.0,0.0,0.0,0.0,0,0.0,112541.0,0.0,0.0,0.0,112541.0,
187,Metropolitan Transportation Commission,4.0,Consolidated Application (Consolidated Application),2022,Not Submitted,BCG0003723,OP,300901,Operating Assistance,No,Yes,0,0.0,0.0,0.0,0.0,0,0.0,6220716.0,0.0,0.0,0.0,6220716.0,
237,Redding Area Bus Aurthority,2.0,Consolidated Application (Consolidated Application),2022,Not Submitted,BCG0003583,OP,300901,Free Fares for Students (LCTOP) #1,No,Yes,22912,0.0,0.0,0.0,0.0,0,0.0,22912.0,0.0,0.0,0.0,22912.0,
238,Redding Area Bus Aurthority,2.0,Consolidated Application (Consolidated Application),2022,Not Submitted,BCG0003740,CA,111204,Zero Emission Van Modernization (LCTOP) #2,No,Yes,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,"HVIP: $69,000.00\nLCTOP: $233,014.00\nPTMISEA: $12,986.00\nTotal Local: $315,000.00"
267,San Francisco Municipal Transportation Agency,4.0,Consolidated Application (Consolidated Application),2022,Not Submitted,BCG0003689,CA,119302,Construction - Bus Shelters (LCTOP Only #2 - 29 Sunset Muni Forward),No,Yes,3005411,0.0,0.0,0.0,0.0,0,0.0,2855411.0,0.0,0.0,0.0,2855411.0,
324,Tuolumne County Transit Agency (TCTA),10.0,Consolidated Application (Consolidated Application),2022,Not Submitted,BCG0003792,OP,300901,LCTOP,No,Yes,0,0.0,0.0,0.0,0.0,0,0.0,133640.0,0.0,0.0,0.0,133640.0,
333,Western Contra Costa Transit Authority,,Consolidated Application (Consolidated Application),2022,Not Submitted,BCG0003647,OP,300901,Operating Assistance (LCTOP Only Free Fares 1 of 1),No,Yes,0,0.0,0.0,0.0,0.0,0,0.0,305073.0,0.0,0.0,0.0,305073.0,


In [7]:
data.application_name.value_counts()

Consolidated Application (Consolidated Application)    346
Name: application_name, dtype: int64

## Organizations

In [8]:
# Replace Ventura, read in weirdly
data["organization_name"] = data["organization_name"].replace(
    {
        "Ventura County Transportation Commission\xa0": "Ventura County Transportation Commission"
    }
)

In [9]:
# Remove any acronyms
data["organization_name"] = data["organization_name"].str.replace(
    "\s+\(.*$", "", regex=True
)

## Project Columns

In [10]:
data["project_category"] = data["project_category"].replace(
    {"OP": "Operating", "CA": "Capital", "PL": "Planning", "CM": "Capital Maintenance"}
)

In [11]:
## Change acronyms to full name. What does PL and CM mean?
data.project_category.unique()

array(['Operating', 'Capital', 'Planning', 'Capital Maintenance'],
      dtype=object)

### Project Descriptions
The descriptions are long and there are 200+ of them. The project category included in the data set is pretty vague, manipulate for something in between.

In [12]:
data["project_description"] = data["project_description"].str.lower()

In [13]:
data.project_description.nunique()

206

In [14]:
# data.project_description.unique()

In [15]:
data["short_description"] = data["project_description"].str.extract(
    "(operating|bus|construction|buses|planning|van|vessel|fare|ridership|vehicle|station|service|equipment|maintenance|surveillance|renovate|free|equip|operational)",
    expand=False,
)

In [16]:
data["short_description"] = data["short_description"].replace(
    {
        "operating": "operating assistance",
        "operational": "operating assistance",
        "free": "free fare program",
        "ridership": "ridership expansion",
        "fare": "purchasing other tech",
        "service": "service expansion",
        "buses": "purchasing vehicles",
        "bus": "purchasing vehicles",
        "van": "purchasing vehicles",
        "vessel": "purchasing vehicles",
        "vehicles": "purchasing vehicles",
        "vehicle": "purchasing vehicles",
        "planning": "transit planning",
        "station": "construction",
        "construction": "construction",
        "maintenance": "maintenance/renovation",
        "renovate": "maintenance/renovation",
        "equipment": "purchasing other tech",
        "equip": "purchasing other tech",
        "surveillance": "purchasing other tech",
    }
)

In [17]:
data["short_description"] = (
    data["short_description"].fillna("other category").str.title()
)

In [18]:
data.short_description.nunique()

10

In [19]:
# data[['project_description','short_description']].tail(250)

## Monetary Columns

<b> Local Total </b> 
* This column represents the different types of local funding a project can receive.
* Extract everything after the colons. 

In [20]:
data["local_total"] = data["local_total"].str.split(": ").str[-1]

In [21]:
data["local_total"] = (
    data["local_total"]
    .str.replace(",", "", regex=True)
    .str.replace("$", "", regex=True)
    .fillna(0)
    .astype("float")
)

In [22]:
monetary_cols = [
    "total_expenses",
    "_5311_funds",
    "_5311_f__funds",
    "_5311_cmaq_funds",
    "_5339_funds",
    "federal_total",
    "other_fed_funds_total",
    "lctop__state__funds",
    "sb1__state_of_good_repair__state__funds",
    "transit_development_act__state__funds",
    "other_state_funds",
    "state_total",
]

In [23]:
# Clean up monetary columns
data[monetary_cols] = (
    data[monetary_cols]
    .fillna(value=0)
    .apply(pd.to_numeric, errors="coerce")
    .astype("float")
)

In [24]:
# Total up all the funding sources
data["total_state_federal_local_funding"] = (
    data["state_total"]
    + data["local_total"]
    + data["federal_total"]
    + data["other_fed_funds_total"]
)

In [25]:
data["total_state_fed_only"] = (
    data["state_total"] + data["federal_total"] 
)

In [26]:
def funding_vs_expenses(df):
    if df["total_state_federal_local_funding"] == df["total_expenses"]:
        return "Fully funded"
    elif df["total_state_federal_local_funding"] > df["total_expenses"]:
        return "Funding exceeds total expenses"
    else:
        return "Not fully funded"

In [27]:
data["fully_funded"] = data.apply(funding_vs_expenses, axis=1)

In [28]:
data.fully_funded.value_counts()

Fully funded                      263
Funding exceeds total expenses     44
Not fully funded                   39
Name: fully_funded, dtype: int64

In [29]:
263 / 346

0.7601156069364162

In [30]:
data[["project_upin", "local_total"]].sample(4)

Unnamed: 0,project_upin,local_total
143,BCG0003910,0.0
72,BCG0003845,0.0
75,BCG0003885,19275.0
204,BCG0003743,0.0


## District 
* Fix null values

In [31]:
no_districts = data[data["district"].isnull()]

In [32]:
no_districts_list = no_districts["project_upin"].tolist()

In [33]:
# In Riverside
data.loc[(data["organization_name"] == "City of Banning"), "district"] = 8
# In Fresno
data.loc[(data["organization_name"] == "City of Clovis"), "district"] = 6
data.loc[(data["organization_name"] == "City of Los Angeles DOT"), "district"] = 7

# Bay Area
data.loc[
    (data["organization_name"] == "Peninsula Corridor Joint Powers Board"), "district"
] = 4

data.loc[
    (data["organization_name"] == "San Joaquin Regional Rail Commission"), "district"
] = 10

data.loc[
    (data["organization_name"] == "Western Contra Costa Transit Authority"), "district"
] = 4

In [34]:
# Create new col with spelled out names
data["full_district_name"] = data["district"]

In [35]:
data["full_district_name"] = data["full_district_name"].replace(
    {
        7: "District 7: Los Angeles",
        4: "District 4: Bay Area / Oakland",
        2: "District 2: Redding",
        9: "District 9: Bishop",
        10: "District 10: Stockton",
        11: "District 11: San Diego",
        3: "District 3: Marysville / Sacramento",
        12: "District 12: Orange County",
        8: "District 8: San Bernardino / Riverside",
        5: "District 5: San Luis Obispo / Santa Barbara",
        6: "District 6: Fresno / Bakersfield",
        1: "District 1: Eureka",
    }
)

## Make a map
* Change function to add geojson file to GCS. 
* https://github.com/cal-itp/data-analyses/blob/main/_shared_utils/shared_utils/utils.py#L29-L40

In [36]:
geojson = gpd.read_file(
    "https://opendata.arcgis.com/datasets/e397d899e7be4ce28ad261867e61ac69_0.geojson"
).to_crs(epsg=4326)

In [37]:
geojson = geojson[["DISTRICT", "Shape_Length", "Shape_Area", "geometry"]]

In [38]:
summarized = data.groupby("district").agg(
    {"project_upin": "count", "total_state_fed_only": "sum"}
).reset_index()

In [39]:
summarized["funding_millions"] = (
    "$"
    + (summarized["total_state_fed_only"].astype(float) / 1000000)
    .round()
    .astype(str)
    + "M"
)

In [40]:
p75 = summarized.total_state_fed_only.quantile(0.75).astype(float)
p25 =summarized.total_state_fed_only.quantile(0.25).astype(float)
p50 = summarized.total_state_fed_only.quantile(0.50).astype(float)

In [41]:
def funding_range(row):
        if ((row.total_state_fed_only > 0) and (row.total_state_fed_only < p25)):
            return "25"
        elif ((row.total_state_fed_only > p25) and (row.total_state_fed_only < p75)):
            return "50"
        elif ((row.total_state_fed_only > p50) and (row.total_state_fed_only > p75 )):
               return "75"
        else:
            return "No Info"

In [42]:
summarized["funding_percentile"] = summarized.apply(lambda x: funding_range(x), axis=1)

In [43]:
summarized

Unnamed: 0,district,project_upin,total_state_fed_only,funding_millions,funding_percentile
0,1.0,30,10983899.0,$11.0M,50
1,2.0,37,9373942.0,$9.0M,25
2,3.0,42,29812565.0,$30.0M,50
3,4.0,47,277215227.08,$277.0M,75
4,5.0,32,21020813.0,$21.0M,50
5,6.0,53,76633070.96,$77.0M,75
6,7.0,15,114012442.0,$114.0M,75
7,8.0,33,21805615.0,$22.0M,50
8,9.0,19,4821987.0,$5.0M,25
9,10.0,28,18923483.75,$19.0M,50


In [44]:
gdf = geojson.merge(
    summarized, how="inner", left_on="DISTRICT", right_on="district"
)

In [45]:
type(gdf)

geopandas.geodataframe.GeoDataFrame

In [46]:
gdf.columns

Index(['DISTRICT', 'Shape_Length', 'Shape_Area', 'geometry', 'district',
       'project_upin', 'total_state_fed_only', 'funding_millions',
       'funding_percentile'],
      dtype='object')

In [47]:
shared_utils.utils.geoparquet_gcs_export(gdf,  "gs://calitp-analytics-data/data-analyses/consolidated_applications/",
    "con_app_gdf")


This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.



## Melt dataframe
* Every project has a unique upin number, use that as the merge column.

In [48]:
len(data)

346

In [49]:
data.project_upin.nunique()

346

In [50]:
data.columns

Index(['organization_name', 'district', 'application_name', 'year',
       'application_status', 'project_upin', 'project_category',
       'project_line_item__ali_', 'project_description', 'is_stimulus',
       'consolidated_application', 'total_expenses', '_5311_funds',
       '_5311_f__funds', '_5311_cmaq_funds', '_5339_funds', 'federal_total',
       'other_fed_funds_total', 'lctop__state__funds',
       'sb1__state_of_good_repair__state__funds',
       'transit_development_act__state__funds', 'other_state_funds',
       'state_total', 'local_total', 'short_description',
       'total_state_federal_local_funding', 'total_state_fed_only',
       'fully_funded', 'full_district_name'],
      dtype='object')

In [51]:
# Keeping only monetary columns I want to melt
monetary_subset = data[
    [
        "project_upin",
        "_5311_funds",
        "_5311_f__funds",
        "_5311_cmaq_funds",
        "_5339_funds",
        "lctop__state__funds",
        "sb1__state_of_good_repair__state__funds",
        "transit_development_act__state__funds",
        "other_state_funds",
        "other_fed_funds_total",
        "local_total",
        "federal_total",
        "state_total",
    ]
]

In [52]:
monetary_subset = pd.melt(
    monetary_subset,
    id_vars=["project_upin"],
    value_vars=[
        "_5311_funds",
        "_5311_f__funds",
        "_5311_cmaq_funds",
        "_5339_funds",
        "lctop__state__funds",
        "sb1__state_of_good_repair__state__funds",
        "transit_development_act__state__funds",
        "other_state_funds",
        "other_fed_funds_total",
        "local_total",
        "federal_total",
        "state_total",
    ],
    var_name="program_name",
    value_name="funding_received",
)

In [53]:
# Delete some irrelevant cols from original data set
data2 = data[
    [
        "total_expenses",
        "organization_name",
        "district",
        "full_district_name",
        "year",
        "application_status",
        "project_upin",
        "project_category",
        "project_line_item__ali_",
        "project_description",
        "is_stimulus",
        "total_state_federal_local_funding",
        "fully_funded",
        "short_description",
    ]
]

In [54]:
# merge original dataframe with melted dataframe to get full information.
merge1 = pd.merge(monetary_subset, data2, on="project_upin", how="left")

In [55]:
merge1.program_name.unique()

array(['_5311_funds', '_5311_f__funds', '_5311_cmaq_funds', '_5339_funds',
       'lctop__state__funds', 'sb1__state_of_good_repair__state__funds',
       'transit_development_act__state__funds', 'other_state_funds',
       'other_fed_funds_total', 'local_total', 'federal_total',
       'state_total'], dtype=object)

In [56]:
merge1["program_name"] = merge1["program_name"].replace(
    {
        "_5311_funds": "5311 (Fed)",
        "lctop__state__funds": "LCTOP (State)",
        "transit_development_act__state__funds": "Transit Development Act (State)",
        "other_state_funds": "Other State Funds",
        "_5339_funds": "5339 (Fed)",
        "_5311_f__funds": "5311(f) (Fed)",
        "sb1__state_of_good_repair__state__funds": "SB1. State of Good Repair (State)",
        "other_fed_funds_total": "Other Federal Funds",
        "_5311_cmaq_funds": "5311 CMAQ (Fed)",
        "local_total": "Local Funds",
        "federal_total": "Federal Total",
        "state_total": "State Total",
    }
)

In [57]:
# looking at new length of merge...
len(merge1)

4152

In [58]:
# double checking that project upin count is still the same
merge1.project_upin.nunique()

346

In [59]:
merge1.program_name.unique()

array(['5311 (Fed)', '5311(f) (Fed)', '5311 CMAQ (Fed)', '5339 (Fed)',
       'LCTOP (State)', 'SB1. State of Good Repair (State)',
       'Transit Development Act (State)', 'Other State Funds',
       'Other Federal Funds', 'Local Funds', 'Federal Total',
       'State Total'], dtype=object)

In [60]:
# filter any zeroes in the funding received column, to make dataframe smaller
melt_df = merge1[merge1["funding_received"] > 0]

In [61]:
melt_df.shape

(1081, 16)

In [62]:
melt_df.loc[melt_df["project_upin"] == "BCG0003960"]

Unnamed: 0,project_upin,program_name,funding_received,total_expenses,organization_name,district,full_district_name,year,application_status,project_category,project_line_item__ali_,project_description,is_stimulus,total_state_federal_local_funding,fully_funded,short_description
53,BCG0003960,5311 (Fed),96956.0,298164.0,City of McFarland,6.0,District 6: Fresno / Bakersfield,2022,Submitted,Operating,300902,operating assistance sliding scale - fy2022,No,315164.0,Funding exceeds total expenses,Operating Assistance
2475,BCG0003960,Other State Funds,218208.0,298164.0,City of McFarland,6.0,District 6: Fresno / Bakersfield,2022,Submitted,Operating,300902,operating assistance sliding scale - fy2022,No,315164.0,Funding exceeds total expenses,Operating Assistance
3513,BCG0003960,Federal Total,96956.0,298164.0,City of McFarland,6.0,District 6: Fresno / Bakersfield,2022,Submitted,Operating,300902,operating assistance sliding scale - fy2022,No,315164.0,Funding exceeds total expenses,Operating Assistance
3859,BCG0003960,State Total,218208.0,298164.0,City of McFarland,6.0,District 6: Fresno / Bakersfield,2022,Submitted,Operating,300902,operating assistance sliding scale - fy2022,No,315164.0,Funding exceeds total expenses,Operating Assistance


### Why do some upins disappear??? 
* Investigation: look at  project upin #s in original dataframe versus pivoted.
* When I pivotted the data, I only included the granular categories (5311/sb1/etc), not the totals. I also filtered out for any $0.
* Found: Projects in "main_list" either
    * Has 0.00 in federal_total, local_total, and state_total
    * Somehow has 0.00 in 5311/5311f/5311cmaq/5339 funds but has the federal_total populated.
    

In [63]:
melt_df.project_upin.nunique()

343

In [64]:
# Investigate
melted_upin = melt_df.project_upin.unique().tolist()

In [65]:
len(melted_upin)

343

In [66]:
data_upin = data.project_upin.unique().tolist()

In [67]:
len(data_upin)

346

In [68]:
main_list = np.setdiff1d(data_upin, melted_upin)
main_list

array(['BCG0003702', 'BCG0003954', 'BCG0003959'], dtype='<U10')

In [69]:
missing_upin = data[
    data["project_upin"].isin(["BCG0003702", "BCG0003954", "BCG0003959"])
]

In [70]:
missing_upin

Unnamed: 0,organization_name,district,application_name,year,application_status,project_upin,project_category,project_line_item__ali_,project_description,is_stimulus,consolidated_application,total_expenses,_5311_funds,_5311_f__funds,_5311_cmaq_funds,_5339_funds,federal_total,other_fed_funds_total,lctop__state__funds,sb1__state_of_good_repair__state__funds,transit_development_act__state__funds,other_state_funds,state_total,local_total,short_description,total_state_federal_local_funding,total_state_fed_only,fully_funded,full_district_name
226,Placer County Public Works,3.0,Consolidated Application (Consolidated Application),2022,Submitted,BCG0003954,Operating,300902,operating assistance sliding scale,No,Yes,2184841.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Operating Assistance,0.0,0.0,Not fully funded,District 3: Marysville / Sacramento
227,Placer County Public Works,3.0,Consolidated Application (Consolidated Application),2022,Submitted,BCG0003959,Operating,300902,operating assistance sliding scale,No,Yes,2184841.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Operating Assistance,0.0,0.0,Not fully funded,District 3: Marysville / Sacramento
231,Plumas County Transportation Commission,2.0,Consolidated Application (Consolidated Application),2022,Submitted,BCG0003702,Operating,300901,lctop #2 - free fare days,No,Yes,22825.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Free Fare Program,0.0,0.0,Not fully funded,District 2: Redding


## Program Groups

In [71]:
# filter out for other stuff.
group = melt_df.loc[
    ~melt_df["program_name"].isin(
        [
            "Local Funds",
            "Federal Total",
            "State Total",
        ]
    )
]

In [72]:
len(group)

481

In [73]:
group.program_name.unique()

array(['5311 (Fed)', '5311(f) (Fed)', '5311 CMAQ (Fed)', '5339 (Fed)',
       'LCTOP (State)', 'SB1. State of Good Repair (State)',
       'Transit Development Act (State)', 'Other State Funds',
       'Other Federal Funds'], dtype=object)

In [74]:
# grab all the different program names by project upin and put it in a new column
group["all_programs"] = group.groupby("project_upin")["program_name"].transform(
    lambda x: ",".join(x)
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [75]:
group = group[["project_upin", "all_programs"]].drop_duplicates()

In [76]:
group.shape

(335, 2)

In [77]:
group[["project_upin", "all_programs"]].head(2)

Unnamed: 0,project_upin,all_programs
2,BCG0003876,"5311 (Fed),LCTOP (State)"
3,BCG0003877,"5311 (Fed),LCTOP (State)"


In [78]:
# merge for other information
grouped_df = pd.merge(group, data, on="project_upin", how="left")

In [79]:
# keep only relevant cols
grouped_df = grouped_df[
    ["project_upin", "organization_name", "project_description", "all_programs", "year"]
]

In [80]:
# count number of items in all programs
# https://stackoverflow.com/questions/51502263/pandas-dataframe-object-has-no-attribute-str
grouped_df["count_of_funding_programs_applied"] = (
    grouped_df["all_programs"]
    .str.split(",+")
    .str.len()
    .groupby(grouped_df.project_upin)
    .transform("sum")
)

In [81]:
grouped_df.sort_values("organization_name").sample(3)

Unnamed: 0,project_upin,organization_name,project_description,all_programs,year,count_of_funding_programs_applied
68,BCG0003910,Imperial County Transportation Commission,operating assistance sliding scale fy 22,"5311 (Fed),Transit Development Act (State),Other State Funds,Other Federal Funds",2022,4
218,BCG0003987,Tulare County Regional Transportation Agency,purchase replacement std 30-34 ft bus-5339(a),"5339 (Fed),Other State Funds",2022,2
6,BCG0003996,City of Arvin,operating assistance sliding scale fy 22,"5311 (Fed),Transit Development Act (State)",2022,2


In [82]:
grouped_df.shape

(335, 6)

## Export


with pd.ExcelWriter(f"{GCS_FILE_PATH}Con_App_Cleaned.xlsx") as writer:
    melt_df.to_excel(writer, sheet_name="pivoted_data", index=False)
    data.to_excel(writer, sheet_name="cleaned_unpivoted_data", index=False)
    grouped_df.to_excel(writer, sheet_name="combos_of_funding_programs", index=False)
