# Project Types of Interest

## Looking into Organizations with the following project types:
*  purchasing wheelchair accessible vehicle
* on-demand scheduling 
* dispatching software platforms

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
import pandas as pd
from siuba import *
import numpy as np

import altair as alt

from shared_utils import altair_utils
from shared_utils import geography_utils
from shared_utils import calitp_color_palette as cp
from shared_utils import styleguide

from dla_utils import _dla_utils

from calitp import to_snakecase

from IPython.display import display, Markdown, HTML

In [4]:
df = pd.read_csv('5310_info_pt.csv')

In [5]:
grouped =pd.read_csv('gs://calitp-analytics-data/data-analyses/5310/5310_org_info_manualfill.csv')

In [6]:
def bar_chart(df, x_col, y_col, color_col, chart_title=""):

    bar = (
        alt.Chart(df)
        .mark_bar()
        .encode(
            x=alt.X(x_col, title=_dla_utils.labeling(x_col), sort=("-y")),
            y=alt.Y(y_col, title=_dla_utils.labeling(y_col)),
            color=alt.Color(
                color_col,
                scale=alt.Scale(range=altair_utils.CALITP_CATEGORY_BRIGHT_COLORS),
                legend=alt.Legend(title=(_dla_utils.labeling(color_col)), symbolLimit=10)
            )
        ).properties(title=chart_title))
    
    chart = styleguide.preset_chart_config(bar)
    
    return chart

In [7]:
org_counts= grouped>>count(_.organization_type)
bar_chart(org_counts,
    "organization_type",
    "n",
    "organization_type",
    "5310 Organization Types"
)

In [8]:
single_county = df[~df["county"].str.contains(",", na=False)]
bar_chart((single_county>>count(_.county)),
    "county",
    "n",
    "county",
    "Counties with 5310 Organizations (Single County)"
)

In [9]:
county_explode = df>>select(_.organization_name, _.ntd_id, _.itp_id, _.organization_type, _.city, _.county, _.merge_status)
county_explode.explode('county')

county_explode2 = county_explode.assign(county_split = df['county'].str.split(', ')).drop("county", axis=1)\
    .rename(columns = {"county_split": "county"})
county_explode2 = county_explode2.explode('county')

bar_chart((county_explode2>>count(_.county)),
    "county",
    "n",
    "county",
    "Counties with 5310 Organizations Operating"
)

## Project Types

In [10]:
#df.sample()

In [11]:
ptype_counts = (df
 >>group_by(_.organization_name)
 >>summarize(n_vp = _.vehicle_purchase.sum(), 
            n_oa = _.operating_assistance.sum(),
            n_mm = _.mobility_management.sum(),
            n_hsp = _.hardware_software_purchase.sum(),
            n_c = _.communications.sum(),
            n_fe = _.facilities_eqp.sum(),
             n_s = _.surveillance.sum(),
             n_sub = _.subsidy.sum()
            )
)

In [12]:
#ptype_counts.sample()

In [13]:
# columns = ['n_vp','n_oa','n_mm','n_hsp','n_c','n_fe','n_s','n_sub']

# for column in columns: 
#     ptype_counts.column[ptype_counts[column]> 0] = 1

In [14]:
#WORKED for getting yes/no present
## from https://stackoverflow.com/questions/61996932/replacing-values-greater-1-in-a-large-pandas-dataframe

#ptype_counts.set_index('organization_name',inplace=True)
#ptype_counts[ptype_counts >= 1] = 'Yes'
#ptype_counts.reset_index()

In [15]:
#ptype_counts = ptype_counts.replace(0, 'No')

In [16]:
#ptype_counts.to_csv('single_counts_2.csv')

In [17]:
# # def project_present(df):
# #     columns = ['n_vp','n_oa','n_mm','n_hsp','n_c','n_fe','n_s','n_sub']
# #     if df.column != 0:
# #         return 1
# #     else:
# #         return 0

# # for column in columns:
# #     df[f"{column}_2"] = df.apply(lambda x: project_present(x), axis=1)
    
    
    
# columns = ['n_vp','n_oa','n_mm','n_hsp','n_c','n_fe','n_s','n_sub']

# for column in columns:
#     ptype_counts[f"{column}_2"] = ptype_counts[column].any(1).astype(int)


In [18]:
ptype_counts.set_index('organization_name',inplace=True)
ptype_counts[ptype_counts >= 1] = 1
ptype_counts = ptype_counts.reset_index()

In [19]:
ptype_counts = ptype_counts.rename(
    columns={
        "organization_name": "Organization Name",
        "n_vp": "Vehicle Purchase",
        "n_oa": "Operating Assistance",
        "n_mm": "Mobility Management",
        "n_hsp": "Hardware/Software Purchase",
        "n_c": "Communications",
        "n_fe": "Facilities Equipment",
        "n_s": "Surveillance",
        "n_sub": "Subsidies",
    }
)

In [20]:
sum_ptype = ((ptype_counts)
               .agg({'Vehicle Purchase':'sum', 'Operating Assistance':'sum',
                      'Mobility Management':'sum','Hardware/Software Purchase':'sum','Communications':'sum',
                     'Facilities Equipment':'sum', 'Surveillance':'sum', 'Subsidies':'sum'}).reset_index())
sum_ptype = sum_ptype.rename(columns = {"index":"Project Types",0:"Count"})
bar_chart((sum_ptype),
    "Project Types",
    "Count",
    "Project Types",
    "Number of Organizations using Project Types"
)

In [22]:
columns = [
    "Vehicle Purchase",
    "Operating Assistance",
    "Mobility Management",
    "Hardware/Software Purchase",
    "Communications",
    "Facilities Equipment",
    "Surveillance",
    "Subsidies",
]
for column in columns:
    org_counts = (ptype_counts 
                  >> filter(_[column] == 1)
                  >> count(_['Organization Name'])
                  >> select(_['Organization Name']))
    display(HTML(f"<strong>Organization with {column} Project Types Present</strong>"))
    display(_dla_utils.pretify_tables(org_counts))
    #display(HTML(org_counts.to_html(index=False)))

AttributeError: module 'dla_utils._dla_utils' has no attribute 'pretify_tables'

### Purchasing Wheelchair Accessible Vehicles

In [None]:
# no project descirption matches for `wheelchair`, `lifts`, `ramps`, and `securement` 

In [None]:
# display(
#     HTML(
#         (f"There are {len(df>>filter(_.description.str.contains('wheelchair')))} project descriptions that mention Wheelchair")
#     ))
    

In [None]:
display(
    HTML(f"Of the {len(df>>filter(_.vehicle_purchase==1)>>count(_.organization_name))} agencies using 5310 funds "
      f"for vehicle purchases, "
      f"{len(df>>filter(_.vehicle_purchase==1)>>filter(_.description.str.contains('Bus'))>>count(_.organization_name))} "
      f"are for Bus Purchases, "
      f"and {len(df>>filter(_.vehicle_purchase==1)>>filter(_.description.str.contains('Van'))>>count(_.organization_name))} "
      f"are for Van Purchases"
         f"<br> There is {len(df>>filter(_.description.str.contains('wheelchair')))} mention of wheelchair "
         f"accessible vehicles, ramps or other accessiblity features"
        ))

**Organizations using funds for Vehicle Purchase**

In [None]:
vehicle_purchase= (df>>filter(_.vehicle_purchase==1)>>count(_.organization_name)>>arrange(-_.n)).head(10)
vehicle_purchase = vehicle_purchase.rename(columns={"organization_name":"Organization Name",
                                "n":"Counts"})
display(HTML(vehicle_purchase.to_html(index=False)))

### On-demand Scheduling

**Organizations with Operating Assistance Project types**

In [None]:
display(HTML(f"There is {len(df>>filter(_.description.str.contains('demand')))} mention of Demand related project types"))
display(HTML(f"There are {len((df>>filter(_.operating_assistance==1)>>count(_.organization_name)>>arrange(-_.n)))} "
             f"mentions of Operating Assistance"))

In [None]:
operating_assistance= (df>>filter(_.operating_assistance==1)>>count(_.organization_name)>>arrange(-_.n)).head(10)
operating_assistance = operating_assistance.rename(columns={"organization_name":"Organization Name",
                                "n":"Counts"})
display(HTML(operating_assistance.to_html(index=False)))

### Dispatching Software Platforms

In [None]:
display(
    HTML(f"Out of the {len(df>>filter(_.hardware_software_purchase==1)>>count(_.organization_name))} "
         f"with harware or software purchases, "
         f"{len(df>>filter(_.hardware_software_purchase==1)>>filter(_.description.str.contains('Software'))>>count(_.organization_name))}" 
             f"organization used 5310 funds for software purposes"))
             

In [None]:
software = (df>>filter(_.hardware_software_purchase==1)>>filter(_.description.str.contains('Software'))>>count(_.organization_name))
software = software.rename(columns={"organization_name":"Organization Name",
                                "n":"Counts"})
display(HTML(software.to_html(index=False)))


## Project Type Funding Analysis

### By Organization & Year

**Sum of Funds Allocated to Organizations by Year (Top 20)**

In [None]:
top20_sum = (
    df
    >> group_by(_.organization_name, _.project_year)
    >> summarize(sum_allocated_by_year=_.allocationamount.sum())
    >> arrange(-_.sum_allocated_by_year)
).head(20)
top20_sum = top20_sum.rename(columns={"organization_name":"Organization Name",
                                      "project_year":"Project Year",
                                      "sum_allocated_by_year":"Sum Allocated by Year"}) 
top20_sum = (top20_sum.style.hide(axis='index').format(formatter={("Sum Allocated by Year"): "${:,.2f}"}))

display(top20_sum)
#display(HTML(top20_sum.to_html(index=False)))

### By Organization & Project Types

**Vehicle Purchases**

In [None]:
vp = (df
 >>filter(_.vehicle_purchase==1)
 >> group_by(_.organization_name, _.project_year)
 >> summarize(sum_allocated = _.allocationamount.sum())
  >> arrange(-_.sum_allocated)
).head(20)

vp = (vp.style.hide(axis='index').format(formatter={("sum_allocated"): "${:,.2f}"}))
display(vp)

**Operating Assistance**

In [None]:
oa= (df
 >>filter(_.operating_assistance==1)
 >> group_by(_.organization_name, _.project_year)
 >> summarize(sum_allocated = _.allocationamount.sum())
 >> arrange(-_.sum_allocated)
).head(20)

oa = (oa.style.hide(axis='index').format(formatter={("sum_allocated"): "${:,.2f}"}))
display(oa)

**Software Purchase**

In [None]:
sp = (df
 >>filter(_.hardware_software_purchase==1)
 >>filter(_.description.str.contains('Software'))
 >> group_by(_.organization_name, _.project_year)
 >> summarize(sum_allocated = _.allocationamount.sum())
  >> arrange(-_.sum_allocated)
).head(20)

sp = (sp.style.hide(axis='index').format(formatter={("sum_allocated"): "${:,.2f}"}))
display(sp)


In [None]:
hsp = (df
 >>filter(_.hardware_software_purchase==1)
 >> group_by(_.organization_name, _.project_year)
 >> summarize(sum_allocated = _.allocationamount.sum())
       >> arrange(-_.sum_allocated)
).head(20)

hsp = (hsp.style.hide(axis='index').format(formatter={("sum_allocated"): "${:,.2f}"}))
display(hsp)


### By County

In [None]:
county = (df
 >> group_by(_.county)
 >> summarize(sum_allocated_by_county = _.allocationamount.sum())
)
county = (county.style.hide(axis='index').format(formatter={("sum_allocated_by_county"): "${:,.2f}"}))
display(county)