## Overlaps between organizations in TIRCP/DLA/Black Cat in MA

In [23]:
import numpy as np
import pandas as pd

pd.options.display.max_columns = 50
pd.options.display.max_rows = 250
pd.set_option("display.max_colwidth", None)
pd.options.display.float_format = "{:.2f}".format

from calitp import *
from siuba import *

#### Load in Black Cat

In [24]:
blackcat = to_snakecase(
    pd.read_excel(
        "gs://calitp-analytics-data/data-analyses/grant_misc/Grant+Projects_3_17_2022.xlsx"
    )
)

In [25]:
#Drop duplicates, keep only organization info
blackcat_agencies = (
    blackcat[["organization_name"]].drop_duplicates()
)

In [26]:
len(blackcat_agencies)

211

In [27]:
#only keep the name of the agencies, not its acronym
blackcat_agencies['organization_name']= blackcat_agencies['organization_name'].str.replace("\s+\(.*$", "")

  blackcat_agencies['organization_name']= blackcat_agencies['organization_name'].str.replace("\s+\(.*$", "")


In [28]:
blackcat_agencies.head(211).sort_values(by = 'organization_name')

Unnamed: 0,organization_name
210,ARC Bakersfield
127,ARC Imperial Valley
200,Able Industries
1110,Alegria Community Living
114,Alpine County Community Development
115,Amador Transit
126,"Angel View, Inc."
2335,Antelope Valley Transit Authority
129,Area 1 Agency on Aging
130,"Asian Community Center of Sacramento Valley, Inc. DBA ACC Senior Services"


#### Load in TIRCP

In [29]:
tircp = pd.read_excel(
    "gs://calitp-analytics-data/data-analyses/tircp/tableau_with_temporary_expenditure_sol.xlsx"
)

In [30]:
#Drop duplicates, keep only organization info
tircp_agencies = tircp[["Grant_Recipient"]].drop_duplicates()

In [31]:
#only keep the name of the agencies, not its acronym
tircp_agencies['Grant_Recipient']= tircp_agencies['Grant_Recipient'].str.replace("\s+\(.*$", "")

  tircp_agencies['Grant_Recipient']= tircp_agencies['Grant_Recipient'].str.replace("\s+\(.*$", "")


In [32]:
#separate out grant recipients column out where there are multiple recipients
tircp_agencies = tircp_agencies[tircp_agencies['Grant_Recipient']  != 'Capitol Corridor Joint Powers Authority, City of Sacramento, SacRT, & Downtown Railyard Venture']
tircp_agencies = tircp_agencies[tircp_agencies['Grant_Recipient']  != 'San Joaquin Regional Rail Commission / San Joaquin Joint Powers Authority']
tircp_agencies = tircp_agencies[tircp_agencies['Grant_Recipient']  != 'LA County Metropolitan Transportation Authority, So Cal Regional Rail Authority']
tircp_agencies = tircp_agencies.drop(65)

In [33]:
#add agencies back in separately 
dict = {'Grant_Recipient':['City of Sacramento', 
                          
                          'San Joaquin Regional Rail Commission',
                          ]
       }
  
df2 = pd.DataFrame(dict)
display(df2)

Unnamed: 0,Grant_Recipient
0,City of Sacramento
1,San Joaquin Regional Rail Commission


In [34]:
tircp_agencies = pd.concat([tircp_agencies, df2], ignore_index = True).drop_duplicates()

In [35]:
len(tircp_agencies)

39

In [36]:
display(tircp_agencies.sort_values(by = 'Grant_Recipient'))

Unnamed: 0,Grant_Recipient
18,Alameda Contra Costa Transit District
19,Anaheim Transportation Network
0,Antelope Valley Transit Authority
20,Bay Area Rapid Transit District
1,Capitol Corridor Joint Powers Authority
14,City of Fresno
30,City of Inglewood
21,City of Los Angeles
37,City of Sacramento
22,City of Santa Monica


#### Merge the 2 together

In [37]:
merge1 = pd.merge(
    blackcat_agencies,
    tircp_agencies,
    how="outer",
    left_on=["organization_name"],
    right_on=["Grant_Recipient"],
    indicator=True,
)

In [38]:
merge1._merge.value_counts()

left_only     201
right_only     29
both           10
Name: _merge, dtype: int64

In [39]:
len(merge1)

240

In [40]:
merge1 = merge1.rename(
    columns={"Grant_Recipient": "TIRCP_Orgs", "organization_name": "BlackCat_Orgs",
            '_merge':'BC_TIRCP_merge'}
)

In [41]:
def progress(df):   
    if (df['BC_TIRCP_merge'] == 'left_only'):
        return 'Black Cat Only'
    elif (df['BC_TIRCP_merge'] == 'right_only'):
        return 'TIRCP_Only'
    else: 
        return "Both in TIRCP and BlackCat"

In [42]:
merge1['BC_TIRCP_merge'] = merge1.apply(progress, axis = 1)

In [43]:
display(merge1.sort_values('BlackCat_Orgs', ascending = True))

Unnamed: 0,BlackCat_Orgs,TIRCP_Orgs,BC_TIRCP_merge
125,ARC Bakersfield,,Black Cat Only
80,ARC Imperial Valley,,Black Cat Only
124,Able Industries,,Black Cat Only
187,Alegria Community Living,,Black Cat Only
76,Alpine County Community Development,,Black Cat Only
77,Amador Transit,,Black Cat Only
79,"Angel View, Inc.",,Black Cat Only
210,Antelope Valley Transit Authority,Antelope Valley Transit Authority,Both in TIRCP and BlackCat
81,Area 1 Agency on Aging,,Black Cat Only
82,"Asian Community Center of Sacramento Valley, Inc. DBA ACC Senior Services",,Black Cat Only


In [44]:
merge1.to_parquet('gs://calitp-analytics-data/data-analyses/grant_misc/BlackCat_TIRCP.parquet')