In [1]:
import pandas as pd
from siuba import *
import numpy as np

import altair as alt

from shared_utils import altair_utils
from shared_utils import geography_utils
from shared_utils import calitp_color_palette as cp
from shared_utils import styleguide

from calitp import to_snakecase

from IPython.display import display, Markdown



In [2]:
df=pd.read_csv('gs://calitp-analytics-data/data-analyses/5310/5310_org_info_manualfill.csv')

In [3]:
bc = to_snakecase(
    pd.read_excel(
        "gs://calitp-analytics-data/data-analyses/grants/Grant+Projects_3_17_2022.xlsx"
    )
)
bc = bc>>filter(_.funding_program.str.contains('5310'))
bc["organization_name"] = (
    bc["organization_name"]
    .str.replace(",", "")
    .str.replace(".", "")
    .str.replace("/", " ")
    #.str.replace("\\", "")
)



In [4]:
bc.head(5)

Unnamed: 0,grant_fiscal_year,funding_program,grant_number,project_year,organization_name,upin,description,ali,contract_number,allocationamount,encumbered_amount,expendedamount,activebalance,closedoutbalance,project_status,project_closed_by,project_closed_date,project_closed_time
125,2017,5310 Exp,CA-2017-169 | 0017000186-E,2017,Amador Transit,BCG0001035,Operating Assistance,300901,64AO18-00677,30144.0,30144.0,30144.0,0.0,0,Open,,,
126,2017,5310 Exp,CA-2017-169 | 0017000186-E,2017,Angel View Inc,BCG0000324,Operating Assistance,300901,64AO18-00770,125000.0,125000.0,125000.0,0.0,0,Open,,,
127,2017,5310 Exp,CA-2017-169 | 0017000186-E,2017,ARC Imperial Valley,BCG0000988,Operating Assistance,300901,64AO18-00630,193211.0,193211.0,193211.0,0.0,0,Open,,,
128,2017,5310 Exp,CA-2017-169 | 0017000186-E,2017,ARC Imperial Valley,BCG0002757,Operating Assistance,300901,64AO18-00630,40000.0,40000.0,40000.0,0.0,0,Open,,,
129,2017,5310 Exp,CA-2017-169 | 0017000186-E,2017,Area 1 Agency on Aging,BCG0000392,Operating Assistance,300901,64AO18-00631,146534.0,146534.0,146534.0,0.0,0,Open,,,


In [5]:
p = pd.merge(bc, df, how='left', on='organization_name')

In [6]:
p['description'] = p['description'].str.strip()

In [7]:
(
    p
    >> group_by(_.organization_name, _.funding_program)
    >> summarize(n=_.description.nunique())
    >> arrange(-_.n)
    >> filter(_.n > 1)
)

Unnamed: 0,organization_name,funding_program,n
39,Community Bridges Liftline,5310 Trad,6
181,United Cerebral Palsy Association of Greater S...,5310 Trad,6
139,Porterville Sheltered Workshop,5310 Trad,5
11,Asian Community Center of Sacramento Valley In...,5310 Trad,4
21,Catholic Charities of the Diocese of Stockton,5310 Trad,4
...,...,...,...
173,The Center for Independent Living,5310 Trad,2
185,Valley Resource Center,5310 Trad,2
187,Victor Valley Community Services Council,5310 Trad,2
188,Victor Valley Transit Authority,5310 Trad,2


In [8]:
p>>count(_.grant_fiscal_year)

Unnamed: 0,grant_fiscal_year,n
0,2017,513
1,2019,602


In [9]:
p>>count(_.project_year)

Unnamed: 0,project_year,n
0,2017,507
1,2018,4
2,2019,604


In [10]:
p>>filter(_.project_year==2018)

Unnamed: 0,grant_fiscal_year,funding_program,grant_number,project_year,organization_name,upin,description,ali,contract_number,allocationamount,...,itp_id,name_NTD_Airtable,doing_business_as,reporter_acronym,organization_type,reporter_type,city,county,mobility_services_operated,merge_status
72,2017,5310 Exp,CA-2017-169 | 0017000186-E,2018,County of Sonoma Human Services Department Adu...,BCG0000526,Operating Assistance,300901,64AO18-00794,266975.0,...,,,,,Independent Agency,,Sonoma,Sonoma,,No Match
73,2017,5310 Exp,CA-2017-169 | 0017000186-E,2018,Desert Access and Mobility Inc,BCG0000752,Operating Assistance,300901,64AO16-00777,125000.0,...,,Desert Access and Mobility Inc,,,Non-Profit Organization,,Palm Springs,Riverside,,Name Match; Airtable
510,2017,5310 Trad,CA-2017-169 | 0017000186-T,2018,County of Sonoma Human Services Department Adu...,BCG0000525,Mobility Management,117L00,64AM18-00764,157532.0,...,,,,,Independent Agency,,Sonoma,Sonoma,,No Match
511,2017,5310 Trad,CA-2017-169 | 0017000186-T,2018,County of Sonoma Human Services Department Adu...,BCG0000598,Purchase Computer Software,114208,64AM18-00764,10000.0,...,,,,,Independent Agency,,Sonoma,Sonoma,,No Match


In [11]:
p>>count(_.grant_number)


Unnamed: 0,grant_number,n
0,CA-2017-169 | 0017000186-E,75
1,CA-2017-169 | 0017000186-T,438
2,CA-2020-244 | 0020000273-E,76
3,CA-2020-244 | 0020000273-T,526


In [12]:
(p>>group_by(_.organization_name, _.grant_fiscal_year)>>count(_.description))

Unnamed: 0,organization_name,grant_fiscal_year,description,n
0,ARC Bakersfield,2017,Purchase Replacement < 30 Ft Bus,16
1,ARC Bakersfield,2017,Purchase Replacement Van,4
2,ARC Imperial Valley,2017,Operating Assistance,2
3,ARC Imperial Valley,2019,Buy Van For Svc Expansion,4
4,ARC Imperial Valley,2019,Operating Assistance,2
...,...,...,...,...
353,Wilshire Community Services,2019,Operating Assistance,1
354,Work Training Center,2017,Purchase Radios,1
355,Work Training Center,2017,Purchase Replacement < 30 Ft Bus,6
356,Yolo County Transportation District,2017,Mobility Management,2


In [13]:
p>>group_by(_.organization_name)>>count(_.description)

Unnamed: 0,organization_name,description,n
0,ARC Bakersfield,Purchase Replacement < 30 Ft Bus,16
1,ARC Bakersfield,Purchase Replacement Van,4
2,ARC Imperial Valley,Buy Van For Svc Expansion,4
3,ARC Imperial Valley,Operating Assistance,4
4,Able Industries,Buy <30-Ft Bus For Expansion,6
...,...,...,...
280,Wilshire Community Services,Operating Assistance,2
281,Work Training Center,Purchase Radios,1
282,Work Training Center,Purchase Replacement < 30 Ft Bus,6
283,Yolo County Transportation District,Mobility Management,2


In [14]:
p.description.value_counts()

Purchase Replacement < 30 Ft Bus                                                                464
Buy <30-Ft Bus For Expansion                                                                    138
Purchase Replacement Van                                                                        122
Operating Assistance                                                                            113
Buy Van For Svc Expansion                                                                        93
Mobility Management                                                                              72
Purchase Computer Hardware                                                                       16
Purchase Radios                                                                                  16
Surveillance/Security                                                                            10
Support Equip/Facilities-Equipment                                                                8


In [15]:
(
    p
    >> group_by(_.organization_name, _.organization_type)
    >> summarize(nunique_desc=_.description.nunique())
    >> arrange(-_.nunique_desc)
)

Unnamed: 0,organization_name,organization_type,nunique_desc
28,Community Bridges Liftline,Non-Profit Organization,7
35,County of Sonoma Human Services Department Adu...,Independent Agency,7
136,United Cerebral Palsy Association of Greater S...,Non-Profit Organization,7
85,Mobility Matters,Non-Profit Organization,6
7,Asian Community Center of Sacramento Valley In...,Non-Profit Organization,5
...,...,...,...
135,United Cerebral Palsy Association Inc of Stani...,Non-Profit Organization,1
137,United Cerebral Palsy of San Luis Obispo County,Non-Profit Organization,1
138,Valley Achievement Center,Non-Profit Organization,1
143,West Valley Community Services,Non-Profit Organization,1


In [16]:
p_agg = (p.groupby(['organization_name'])
      .agg({'description': lambda x: x.tolist()})
      .rename({'description' : 'descriptions'}, axis=1)
      .reset_index())

In [17]:
p_agg

Unnamed: 0,organization_name,descriptions
0,ARC Bakersfield,"[Purchase Replacement < 30 Ft Bus, Purchase Re..."
1,ARC Imperial Valley,"[Operating Assistance, Operating Assistance, O..."
2,Able Industries,"[Buy <30-Ft Bus For Expansion, Buy <30-Ft Bus ..."
3,Alegria Community Living,"[Purchase Replacement Van, Purchase Replacemen..."
4,Amador Transit,"[Operating Assistance, Mobility Management, Mo..."
...,...,...
142,Vivalon Inc,"[Operating Assistance, Purchase Replacement < ..."
143,West Valley Community Services,"[Operating Assistance, Operating Assistance, O..."
144,Wilshire Community Services,"[Operating Assistance, Operating Assistance]"
145,Work Training Center,"[Purchase Replacement < 30 Ft Bus, Purchase Re..."


In [18]:
description = p>>group_by(_.organization_name)>>count(_.description)

In [19]:
description

Unnamed: 0,organization_name,description,n
0,ARC Bakersfield,Purchase Replacement < 30 Ft Bus,16
1,ARC Bakersfield,Purchase Replacement Van,4
2,ARC Imperial Valley,Buy Van For Svc Expansion,4
3,ARC Imperial Valley,Operating Assistance,4
4,Able Industries,Buy <30-Ft Bus For Expansion,6
...,...,...,...
280,Wilshire Community Services,Operating Assistance,2
281,Work Training Center,Purchase Radios,1
282,Work Training Center,Purchase Replacement < 30 Ft Bus,6
283,Yolo County Transportation District,Mobility Management,2


In [20]:
description_agg = (description.groupby(['organization_name'])
      .agg({'description': lambda x: x.tolist()})
      .rename({'description' : 'descriptions'}, axis=1)
      .reset_index())

In [21]:
description_agg

Unnamed: 0,organization_name,descriptions
0,ARC Bakersfield,"[Purchase Replacement < 30 Ft Bus, Purchase Re..."
1,ARC Imperial Valley,"[Buy Van For Svc Expansion, Operating Assistance]"
2,Able Industries,[Buy <30-Ft Bus For Expansion]
3,Alegria Community Living,[Purchase Replacement Van]
4,Amador Transit,"[Mobility Management, Operating Assistance]"
...,...,...
142,Vivalon Inc,"[Operating Assistance, Purchase Replacement < ..."
143,West Valley Community Services,[Operating Assistance]
144,Wilshire Community Services,[Operating Assistance]
145,Work Training Center,"[Purchase Radios, Purchase Replacement < 30 Ft..."


In [22]:
description_agg>>filter(_.organization_name =='Amador Transit')

Unnamed: 0,organization_name,descriptions
4,Amador Transit,"[Mobility Management, Operating Assistance]"


## Specific Project Types
finding project types for 
* wheelchair accessible vehicles
* on-demand scheduling
* dispatching software platforms

In [23]:
p.description.value_counts()

Purchase Replacement < 30 Ft Bus                                                                464
Buy <30-Ft Bus For Expansion                                                                    138
Purchase Replacement Van                                                                        122
Operating Assistance                                                                            113
Buy Van For Svc Expansion                                                                        93
Mobility Management                                                                              72
Purchase Computer Hardware                                                                       16
Purchase Radios                                                                                  16
Surveillance/Security                                                                            10
Support Equip/Facilities-Equipment                                                                8


In [24]:
p>>filter(_.description.str.contains("Bus"))>>select(_.organization_name,
                                                     _.project_year,
                                                     _.description
                                                    )

Unnamed: 0,organization_name,project_year,description
75,Able Industries,2017,Buy <30-Ft Bus For Expansion
76,Able Industries,2017,Buy <30-Ft Bus For Expansion
77,Able Industries,2017,Buy <30-Ft Bus For Expansion
78,Able Industries,2017,Buy <30-Ft Bus For Expansion
79,Able Industries,2017,Buy <30-Ft Bus For Expansion
...,...,...,...
1110,Vivalon Inc,2019,Purchase Replacement < 30 Ft Bus
1111,Vivalon Inc,2019,Purchase Replacement < 30 Ft Bus
1112,Vivalon Inc,2019,Purchase Replacement < 30 Ft Bus
1113,Vivalon Inc,2019,Purchase Replacement < 30 Ft Bus


In [25]:
p>>filter(_.description.str.contains("Bus"))>>count(_.description)>>arrange(-_.n)

Unnamed: 0,description,n
3,Purchase Replacement < 30 Ft Bus,464
2,Buy <30-Ft Bus For Expansion,138
0,Buy 30-Ft Bus For Expansion,2
1,Buy 35-Ft Bus For Expansion,1
4,Purchase Replacement < 30 Ft Bus for JDA,1


In [26]:
display(Markdown(
    f"There are {len(p>>filter(_.description.str.contains('Bus'))>>count(_.organization_name))} "
    f"organizations with Bus purchases as project types"))

There are 75 organizations with Bus purchases as project types

In [27]:
p>>filter(_.description.str.contains("Van"))>>select(_.organization_name,
                                                     _.project_year,
                                                     _.description
                                                    )>>count(_.description)

Unnamed: 0,description,n
0,Buy Van For Svc Expansion,93
1,Operating Assistance - Operating Accessible Vans,1
2,Purchase Replacement Van,122
3,Purchase Replacement Van 1,1
4,Purchase Replacement Van 2,1


In [28]:
(p[(p.description.str.contains("Van")==True) & (p.description.str.contains("Assistance")==False)])>>select(
    _.organization_name,
    _.project_year,
    _.description
)

Unnamed: 0,organization_name,project_year,description
34,Friends of Children with Special Needs,2017,Buy Van For Svc Expansion
35,Friends of Children with Special Needs,2017,Buy Van For Svc Expansion
93,ARC Bakersfield,2017,Purchase Replacement Van
94,ARC Bakersfield,2017,Purchase Replacement Van
103,ARC Bakersfield,2017,Purchase Replacement Van
...,...,...,...
1039,United Cerebral Palsy Association of Greater S...,2019,Buy Van For Svc Expansion
1089,Valley Resource Center,2019,Purchase Replacement Van
1090,Valley Resource Center,2019,Purchase Replacement Van
1095,Valley Resource Center,2019,Purchase Replacement Van


In [29]:
p[(p.description.str.contains("Van")==True) & (p.description.str.contains("Assistance")==False)]>>count(_.description)

Unnamed: 0,description,n
0,Buy Van For Svc Expansion,93
1,Purchase Replacement Van,122
2,Purchase Replacement Van 1,1
3,Purchase Replacement Van 2,1


In [30]:
len(p[(p.description.str.contains("Van")==True) & (p.description.str.contains("Assistance")==False)]>>count(_.organization_name))

38

In [31]:
display(Markdown(
    f"There are " 
    f"{len(p[(p.description.str.contains('Van')==True) & (p.description.str.contains('Assistance')==False)]>>count(_.organization_name))} "
    f"organizations with Van purchases as project types"))

There are 38 organizations with Van purchases as project types

In [32]:
p>>filter(_.description.str.contains("On-demand"))>>select(_.organization_name,
                                                     _.project_year,
                                                     _.description
                                                    )

Unnamed: 0,organization_name,project_year,description


In [33]:
display(Markdown(
    f"There are " 
    f"{len(p>>filter(_.description.str.contains('On-demand'))>>select(_.organization_name,_.project_year, _.description))} "
    f"organizations with On-Demand purchases as project types"))

There are 0 organizations with On-Demand purchases as project types

In [34]:
p>>filter(_.description.str.contains("Operating Assistance"))>>select(_.organization_name,
                                                     _.project_year,
                                                     _.description
                                                    )

Unnamed: 0,organization_name,project_year,description
0,Amador Transit,2017,Operating Assistance
1,Angel View Inc,2017,Operating Assistance
2,ARC Imperial Valley,2017,Operating Assistance
3,ARC Imperial Valley,2017,Operating Assistance
4,Area 1 Agency on Aging,2017,Operating Assistance
...,...,...,...
584,United Cerebral Palsy Association of Greater S...,2019,Operating Assistance
585,Valley Resource Center,2019,Operating Assistance
586,West Valley Community Services,2019,Operating Assistance
587,West Valley Community Services,2019,Operating Assistance


In [35]:
p>>filter(_.description.str.contains("Operating Assistance"))>>select(_.organization_name,
                                                     _.project_year,
                                                     _.description
                                                    )>>count(_.organization_name)

Unnamed: 0,organization_name,n
0,ARC Imperial Valley,4
1,Amador Transit,1
2,Angel View Inc,3
3,Area 1 Agency on Aging,1
4,Asian Community Center of Sacramento Valley In...,6
...,...,...
60,Valley Resource Center,2
61,Victor Valley Community Services Council,1
62,Vivalon Inc,1
63,West Valley Community Services,3


In [36]:
display(Markdown(
    f"There are " 
    f"{len((p>>filter(_.description.str.contains('Operating Assistance'))>>select(_.organization_name,_.project_year,_.description))>>count(_.organization_name))} "
    f"organizations with Operating Assistance purchases as project types"))
       

There are 65 organizations with Operating Assistance purchases as project types

In [37]:
p>>filter(_.description.str.contains("Software"))>>select(_.organization_name,
                                                     _.project_year,
                                                     _.description
                                                    )

Unnamed: 0,organization_name,project_year,description
117,Asian Community Center of Sacramento Valley In...,2017,Purchase Computer Software
118,Asian Community Center of Sacramento Valley In...,2017,Purchase Computer Software
508,Yolo County Transportation District,2017,Purchase Computer Software
509,Yolo County Transportation District,2017,Purchase Computer Software
511,County of Sonoma Human Services Department Adu...,2018,Purchase Computer Software
631,Catholic Charities of the Diocese of Stockton,2019,Purchase Computer Software
632,Catholic Charities of the Diocese of Stockton,2019,Purchase Computer Software


In [38]:
p>>filter(_.description.str.contains("Hardware"))>>select(_.organization_name,
                                                     _.project_year,
                                                     _.description
                                                    )

Unnamed: 0,organization_name,project_year,description
111,Asian Community Center of Sacramento Valley In...,2017,Purchase Computer Hardware
166,Community Bridges Liftline,2017,Purchase Computer Hardware
231,Foothill AIDS Project,2017,Purchase Computer Hardware
421,San Joaquin Regional Transit District,2017,Purchase Computer Hardware
477,United Cerebral Palsy Association of Greater S...,2017,Purchase Computer Hardware
478,United Cerebral Palsy Association of Greater S...,2017,Purchase Computer Hardware
629,Catholic Charities of the Diocese of Stockton,2019,Purchase Computer Hardware
630,Catholic Charities of the Diocese of Stockton,2019,Purchase Computer Hardware
653,Community Bridges Liftline,2019,Purchase Computer Hardware
654,Community Bridges Liftline,2019,Purchase Computer Hardware


In [39]:
p.query('description.str.contains("Hardware") or description.str.contains("Software")')>>count(_.description)

Unnamed: 0,description,n
0,Purchase Computer Hardware,16
1,Purchase Computer Software,7


In [40]:
(p.query('description.str.contains("Hardware") or description.str.contains("Software")')>>count(_.organization_name))

Unnamed: 0,organization_name,n
0,Asian Community Center of Sacramento Valley In...,3
1,Catholic Charities of the Diocese of Stockton,4
2,Community Bridges Liftline,3
3,County of Sonoma Human Services Department Adu...,1
4,Foothill AIDS Project,1
5,Milestones of Development INC,2
6,Porterville Sheltered Workshop,2
7,San Joaquin Regional Transit District,1
8,Sunline Transit Agency,2
9,United Cerebral Palsy Association of Greater S...,2


In [41]:
display(Markdown(
    f"There are " 
    f"{len((p[(p['description'].str.contains('Hardware')) | (p['description'].str.contains('Software'))])>>count(_.organization_name))} "
    f"organizations with Hardware or Software purchases as project types"))
       

There are 11 organizations with Hardware or Software purchases as project types

# Description by Year

In [42]:
description_year = p>>group_by(_.organization_name, _.project_year)>>count(_.description)

In [43]:
description_year

Unnamed: 0,organization_name,project_year,description,n
0,ARC Bakersfield,2017,Purchase Replacement < 30 Ft Bus,16
1,ARC Bakersfield,2017,Purchase Replacement Van,4
2,ARC Imperial Valley,2017,Operating Assistance,2
3,ARC Imperial Valley,2019,Buy Van For Svc Expansion,4
4,ARC Imperial Valley,2019,Operating Assistance,2
...,...,...,...,...
354,Wilshire Community Services,2019,Operating Assistance,1
355,Work Training Center,2017,Purchase Radios,1
356,Work Training Center,2017,Purchase Replacement < 30 Ft Bus,6
357,Yolo County Transportation District,2017,Mobility Management,2


In [44]:
p_agg2 = (description_year.groupby(['organization_name', 'project_year'])
      .agg({'description': lambda x: x.tolist()})
      .rename({'description' : 'descriptions'},axis=1)
      .reset_index())

In [45]:
p_agg2

Unnamed: 0,organization_name,project_year,descriptions
0,ARC Bakersfield,2017,"[Purchase Replacement < 30 Ft Bus, Purchase Re..."
1,ARC Imperial Valley,2017,[Operating Assistance]
2,ARC Imperial Valley,2019,"[Buy Van For Svc Expansion, Operating Assistance]"
3,Able Industries,2017,[Buy <30-Ft Bus For Expansion]
4,Alegria Community Living,2019,[Purchase Replacement Van]
...,...,...,...
221,West Valley Community Services,2019,[Operating Assistance]
222,Wilshire Community Services,2017,[Operating Assistance]
223,Wilshire Community Services,2019,[Operating Assistance]
224,Work Training Center,2017,"[Purchase Radios, Purchase Replacement < 30 Ft..."


In [46]:
p_agg2>>filter(_.organization_name =='Amador Transit')

Unnamed: 0,organization_name,project_year,descriptions
5,Amador Transit,2017,"[Mobility Management, Operating Assistance]"
6,Amador Transit,2019,[Mobility Management]


In [47]:
year = (p.groupby(['organization_name','project_year'])
        .agg({'description': lambda x: x.tolist()})
        .unstack('project_year', fill_value=0))

In [48]:
year

Unnamed: 0_level_0,description,description,description
project_year,2017,2018,2019
organization_name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
ARC Bakersfield,"[Purchase Replacement < 30 Ft Bus, Purchase Re...",0,0
ARC Imperial Valley,"[Operating Assistance, Operating Assistance]",0,"[Operating Assistance, Operating Assistance, B..."
Able Industries,"[Buy <30-Ft Bus For Expansion, Buy <30-Ft Bus ...",0,0
Alegria Community Living,0,0,"[Purchase Replacement Van, Purchase Replacemen..."
Amador Transit,"[Operating Assistance, Mobility Management]",0,"[Mobility Management, Mobility Management]"
...,...,...,...
Vivalon Inc,"[Operating Assistance, Purchase Replacement < ...",0,"[Purchase Replacement < 30 Ft Bus, Purchase Re..."
West Valley Community Services,0,0,"[Operating Assistance, Operating Assistance, O..."
Wilshire Community Services,[Operating Assistance],0,[Operating Assistance]
Work Training Center,"[Purchase Replacement < 30 Ft Bus, Purchase Re...",0,0


In [49]:
# get rid of the duplicates by year

In [50]:
year2 = (description_year.groupby(['organization_name','project_year'])
        .agg({'description': lambda x: x.tolist()})
        .unstack('project_year', fill_value=0))

In [51]:
year2

Unnamed: 0_level_0,description,description,description
project_year,2017,2018,2019
organization_name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
ARC Bakersfield,"[Purchase Replacement < 30 Ft Bus, Purchase Re...",0,0
ARC Imperial Valley,[Operating Assistance],0,"[Buy Van For Svc Expansion, Operating Assistance]"
Able Industries,[Buy <30-Ft Bus For Expansion],0,0
Alegria Community Living,0,0,[Purchase Replacement Van]
Amador Transit,"[Mobility Management, Operating Assistance]",0,[Mobility Management]
...,...,...,...
Vivalon Inc,"[Operating Assistance, Purchase Replacement < ...",0,[Purchase Replacement < 30 Ft Bus]
West Valley Community Services,0,0,[Operating Assistance]
Wilshire Community Services,[Operating Assistance],0,[Operating Assistance]
Work Training Center,"[Purchase Radios, Purchase Replacement < 30 Ft...",0,0


In [52]:
# testing for single organization: Amador Transit
(p>>filter(_.organization_name =='Amador Transit'))>>select(_.project_year, _.description)

Unnamed: 0,project_year,description
0,2017,Operating Assistance
81,2017,Mobility Management
599,2019,Mobility Management
600,2019,Mobility Management


In [53]:
# great, exporting
#year.to_csv('by_years.csv')

In [54]:
p_agg2

Unnamed: 0,organization_name,project_year,descriptions
0,ARC Bakersfield,2017,"[Purchase Replacement < 30 Ft Bus, Purchase Re..."
1,ARC Imperial Valley,2017,[Operating Assistance]
2,ARC Imperial Valley,2019,"[Buy Van For Svc Expansion, Operating Assistance]"
3,Able Industries,2017,[Buy <30-Ft Bus For Expansion]
4,Alegria Community Living,2019,[Purchase Replacement Van]
...,...,...,...
221,West Valley Community Services,2019,[Operating Assistance]
222,Wilshire Community Services,2017,[Operating Assistance]
223,Wilshire Community Services,2019,[Operating Assistance]
224,Work Training Center,2017,"[Purchase Radios, Purchase Replacement < 30 Ft..."


## Function for desription year

In [63]:
description_year.sample(2)

Unnamed: 0,organization_name,project_year,description,n
355,Work Training Center,2017,Purchase Radios,1
79,Consolidated Tribal Health Project Inc,2017,Operating Assistance,1


In [97]:
description_year['description'] = description_year['description'].astype(str)

In [98]:
description_year.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 359 entries, 0 to 358
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   organization_name  359 non-null    object
 1   project_year       359 non-null    int64 
 2   description        359 non-null    object
 3   n                  359 non-null    int64 
 4   project_type       359 non-null    object
dtypes: int64(2), object(3)
memory usage: 14.1+ KB


In [99]:
# def get_project_type(df)
#     Vehicle_Purchase = ['Bus','Van']
#     Operating_Assistance = ['Operating Assistance']
#     Mobility_Management = ['Mobility Management']
#     Hardware_Software_Purchase = ['Hardware', 'Software']
    
#     def project_types_def(row):
#         if (row.description in 
#             return "Vehicle Purchase"
#         elif (description.str.contains('Operating Assistance')):
#             return "Operating Assistance"
#         elif (description.str.contains('Mobility Management')):
#             return "Mobility Management"
#         elif (description.str.contains('Hardware')) | (description.str.contains('Software')):
#             return "Hardware/Software Purchase"
#         else:
#             return "Other"
#     description_year["project_type"] = description_year.apply(lambda x: project_types_def(x), axis=1)

In [100]:
# def project_types_def(row):
#         if (description.str.contains('Bus')) | (description.str.contains('Van')):
#             return "Vehicle Purchase"
#         elif (description.str.contains('Operating Assistance')):
#             return "Operating Assistance"
#         elif (description.str.contains('Mobility Management')):
#             return "Mobility Management"
#         elif (description.str.contains('Hardware')) | (description.str.contains('Software')):
#             return "Hardware/Software Purchase"
#         else:
#             return "Other"
# description_year["project_type"] = description_year.apply(lambda x: project_types_def(x), axis=1)

### Using Boolean

In [106]:
VEHICLE_PURCHASE = ['bus','van']
OPERATING_ASSISTANCE = ['operating assistance']
MOBILITY_MANAGEMENT = ['mobility management']
HARDWARE_SOFTWARE_PURCHASE = ['hardware', 'software']


def categorize_project_descriptions(row):
    
    project_description = row.description.lower()
    

    vehicle_purchase = 0
    operating_assistance = 0
    mobility_management = 0
    hardware_software_purchase = 0
        
    if any(word in description for word in VEHICLE_PURCHASE):
        vehicle_purchase = 1
        
    if any(word in description for word in OPERATING_ASSISTANCE):
        operating_assistance = 1
        
    if any(word in description for word in MOBILITY_MANAGEMENT):
        mobility_management = 1
        
    if any(word in description for word in HARDWARE_SOFTWARE_PURCHASE):
        hardware_software_purchase = 1
    
    return pd.Series(
        [vehicle_purchase, operating_assistance, mobility_management, hardware_software_purchase], 
        index=['vehicle_purchase', 'operating_assistance', 'mobility_management', 'hardware_software_purchase',]
    )
   

In [107]:
project_categories = description_year.apply(categorize_project_descriptions, axis=1)
add_cat = pd.concat([description_year, project_categories], axis=1)
work_cols = list(project_categories.columns)
add_cat = add_cat.assign(
    project_categories = add_cat[work_cols].sum(axis=1)
)

In [108]:
project_categories2.sample(5)

Unnamed: 0,organization_name,project_year,description,n,vehicle_purchase,operating_assistance,mobility_management,hardware_software_purchase,project_categories
223,North Valley Services,2017,Purchase Replacement < 30 Ft Bus,3,0,0,0,0,0
282,SAHA,2019,Buy <30-Ft Bus For Expansion,2,0,0,0,0,0
241,On Lok Senior Health Services,2017,Purchase Replacement Van,1,0,0,0,0,0
8,Amador Transit,2017,Operating Assistance,1,0,0,0,0,0
344,Victor Valley Community Services Council,2017,Operating Assistance,1,0,0,0,0,0


In [116]:
project_categories2.project_categories.value_counts()


0    359
Name: project_categories, dtype: int64

### trying another way

In [118]:
description_year.description.value_counts()

Operating Assistance                                                                            87
Purchase Replacement < 30 Ft Bus                                                                81
Mobility Management                                                                             39
Purchase Replacement Van                                                                        30
Buy <30-Ft Bus For Expansion                                                                    28
Buy Van For Svc Expansion                                                                       22
Purchase Computer Hardware                                                                      10
Purchase Radios                                                                                 10
Surveillance/Security                                                                            5
Purchase Computer Software                                                                       4
Support Eq

In [119]:
def project_types_def(row):
        if (row.description== ('Purchase Replacement < 30 Ft Bus')):
            return "Vehicle Purchase"
        elif (row.description== ('Buy <30-Ft Bus For Expansion')):
            return "Vehicle Purchase"
        elif (row.description== ('Purchase Replacement Van')):
            return "Vehicle Purchase"
        elif (row.description==('Operating Assistance')):
            return "Operating Assistance"
        elif (row.description==('Mobility Management')):
            return "Mobility Management"
        elif (row.description == ('Purchase Computer Hardware')) | (row.description==('Purchase Computer Software')):
            return "Hardware/Software Purchase"
        else:
            return "Other"
description_year["project_type"] = description_year.apply(lambda x: project_types_def(x), axis=1)

In [120]:
description_year

Unnamed: 0,organization_name,project_year,description,n,project_type
0,ARC Bakersfield,2017,Purchase Replacement < 30 Ft Bus,16,Vehicle Purchase
1,ARC Bakersfield,2017,Purchase Replacement Van,4,Vehicle Purchase
2,ARC Imperial Valley,2017,Operating Assistance,2,Operating Assistance
3,ARC Imperial Valley,2019,Buy Van For Svc Expansion,4,Other
4,ARC Imperial Valley,2019,Operating Assistance,2,Operating Assistance
...,...,...,...,...,...
354,Wilshire Community Services,2019,Operating Assistance,1,Operating Assistance
355,Work Training Center,2017,Purchase Radios,1,Other
356,Work Training Center,2017,Purchase Replacement < 30 Ft Bus,6,Vehicle Purchase
357,Yolo County Transportation District,2017,Mobility Management,2,Mobility Management


In [121]:
description_year.project_type.value_counts()

Vehicle Purchase              139
Operating Assistance           87
Other                          80
Mobility Management            39
Hardware/Software Purchase     14
Name: project_type, dtype: int64

### Searching using lambda

In [112]:
p_agg2[p_agg2['descriptions'].apply(lambda x: 'Buy Van' in x)]

Unnamed: 0,organization_name,project_year,descriptions


In [113]:
p_agg2[p_agg2['descriptions'].apply(lambda x: 'Operating Assistance' in x)]


Unnamed: 0,organization_name,project_year,descriptions
1,ARC Imperial Valley,2017,[Operating Assistance]
2,ARC Imperial Valley,2019,"[Buy Van For Svc Expansion, Operating Assistance]"
5,Amador Transit,2017,"[Mobility Management, Operating Assistance]"
7,Angel View Inc,2017,"[Buy <30-Ft Bus For Expansion, Operating Assis..."
8,Angel View Inc,2019,"[Operating Assistance, Purchase Replacement < ..."
...,...,...,...
215,Victor Valley Community Services Council,2017,"[Buy 35-Ft Bus For Expansion, Operating Assist..."
219,Vivalon Inc,2017,"[Operating Assistance, Purchase Replacement < ..."
221,West Valley Community Services,2019,[Operating Assistance]
222,Wilshire Community Services,2017,[Operating Assistance]
