## GGRF Analysis
* Goal: Narrow down projects that need intervention with allocating funds from GGRF? 

In [115]:
import A1_data_prep
import A2_tableau
import numpy as np
import pandas as pd
from babel.numbers import format_currency
from calitp.sql import to_snakecase

pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [116]:
# Allocation subset
alloc_subset = ['allocation_project_id','allocation_phase', 'allocation_sb1_funding',
                'allocation_sb1_funding']

In [117]:
alloc = A1_data_prep.clean_allocation()[alloc_subset]

In [118]:
def load_expenditures():
    # Load in original sheet
    df = pd.read_excel(f"{A1_data_prep.GCS_FILE_PATH}3010_3020 Expenditure for fund 0046.xls", sheet_name="Download")

    # First 10 columns or so are not data, drop them
    df = (df.iloc[9:].reset_index(drop=True))
          
    # The first row contains column names - update it to the column
    df.columns = df.iloc[0]
          
    # Drop the first row as they are now column names
    df = df.drop(df.index[0]).reset_index(drop=True)
    
    df = to_snakecase(df)
    
    df = (df.loc[df['fy'] > '2017']).reset_index()
    return df

In [128]:
expenditure = load_expenditures()

In [129]:
expenditure['project'] = expenditure['project'].astype(str).str[2:]

In [130]:
def load_project_status():
    df = pd.read_excel(f"{A1_data_prep.GCS_FILE_PATH}Project status for fund 0046 as of 12-5-22.xls", sheet_name="Download")

    df = df.iloc[6:].reset_index(drop=True)
    
    df.columns = df.iloc[0]
    
    # Drop the first row as they are now column names
    df = df.drop(df.index[0]).reset_index(drop=True)
    
    # Coerce certain columns to numeric
    df[["Billed", "Reimbursements"]] = df[["Billed", "Reimbursements"]].apply(
        pd.to_numeric, errors="coerce"
    )
    # Group project id so the same ones will be on one line
    df = df.groupby(['Project']).agg({'Billed':'sum', 'Reimbursements':'sum'}).reset_index()
    
    df = to_snakecase(df)
    
    return df

In [131]:
project_status = load_project_status()

In [132]:
m1 = pd.merge(
    alloc, expenditure, how="left", left_on = "allocation_project_id", right_on = "project", indicator=True
)

In [135]:
alloc.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 370 entries, 0 to 372
Data columns (total 4 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   allocation_project_id   370 non-null    object 
 1   allocation_phase        370 non-null    object 
 2   allocation_sb1_funding  370 non-null    float64
 3   allocation_sb1_funding  370 non-null    float64
dtypes: float64(2), object(2)
memory usage: 14.5+ KB


In [136]:
expenditure.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1250 entries, 0 to 1249
Data columns (total 16 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   index               1250 non-null   int64 
 1   fy                  1250 non-null   object
 2   fund                1250 non-null   object
 3   appr_catg           1250 non-null   object
 4   appr_unit           1250 non-null   object
 5   pec                 1250 non-null   object
 6   project             1250 non-null   object
 7   project_name        1237 non-null   object
 8   project_short_name  1237 non-null   object
 9   phase               1237 non-null   object
 10  old_ea              1250 non-null   object
 11  capital_exp         1250 non-null   object
 12  local_exp           1250 non-null   object
 13  support_exp         1250 non-null   object
 14  other_exp           1250 non-null   object
 15  tot_exp             1250 non-null   object
dtypes: int64(1), object(15)
