In [1]:
import pandas as pd
import numpy as np
np.set_printoptions(suppress=True, precision=2)
import matplotlib.pyplot as plt
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

%matplotlib inline

In [2]:
ls

DV_data.csv                DV_program_analysis.ipynb  README.md


In [3]:
data = pd.read_csv('DV_data.csv')
data.sort_values('City', inplace=True)

In [4]:
data.head()

Unnamed: 0,City,Agency,Language,Website,fees listed on website,Address,Lat_Long,Registration_min,Registration_max,Other_costs,Class_min,Class_max,Veterans_only,Restrictions,Data_issue,Notes
0,Arcadia,4 Real Solutions Human Service Agency,English,no,,"150 N Santa Anita Ave, Arcadia, CA 91006","34.141674, -118.031171",,,,,,False,,Left VM 8/11,
1,Arcadia,4 Real Solutions Human Service Agency,Spanish,no,,"150 N Santa Anita Ave, Arcadia, CA 91006","34.141674, -118.031171",,,,,,False,,Left VM 8/11,
2,Arcadia,4 Real Solutions Human Service Agency,Chinese,no,,"150 N Santa Anita Ave, Arcadia, CA 91006","34.141674, -118.031171",,,,,,False,,Left VM 8/11,
3,Arcadia,Win-Win Relationship Learning Center,English,no,no,"66 W Duarte Rd, Arcadia, CA 91007","34.128777, -118.034497",40.0,40.0,0.0,18.0,20.0,False,,,
4,Arcadia,Win-Win Relationship Learning Center,Chinese,no,no,"66 W Duarte Rd, Arcadia, CA 91007","34.128777, -118.034497",40.0,40.0,0.0,20.0,30.0,False,,,


In [5]:
data['Lat'] = data['Lat_Long'].str.split(',').apply(lambda x: float(x[0]))
data['Long'] = data['Lat_Long'].str.split(',').apply(lambda x: float(x[1]))

In [6]:
data['Registration_min'] = np.where(data['Registration_min'].str.startswith("?"), np.nan, data['Registration_min'])
data['Registration_min'] = data['Registration_min'].astype(float)
data['Registration_max'] = np.where(data['Registration_max'].str.startswith("?"), np.nan, data['Registration_max'])
data['Registration_max'] = data['Registration_max'].astype(float)
# data['Other_costs'] = np.where(data['Other_costs'].str.startswith("?"), np.nan, data['Other_costs'])
data['Class_min'] = np.where(data['Class_min'].str.startswith("?"), np.nan, data['Class_min'])
data['Class_min'] = data['Class_min'].astype(float)
# data['Class_max'] = np.where(data['Class_max'].str.startswith("?"), np.nan, data['Class_max'])


In [18]:
no_nulls = data.dropna(subset=['Class_min'])

languages = no_nulls['Language'].unique().tolist()
print(languages)

['English', 'Chinese', 'Spanish', 'Armenian', 'Mandarin', 'Cantonese', 'Korean', 'Farsi', 'Portuguese', 'ASL', 'Russian']


In [12]:
num_sessions = 52

# Some classes will only do the min cost of $10 for three months, have Data_issue = '10-for-3'
data['min_total_cost'] = data['Registration_min'].astype(float) + data['Other_costs'] + \
    np.where(data['Data_issue'] == '10-for-3', 12*data['Class_min'] + (num_sessions - 12)*data['Class_max'], num_sessions*data['Class_min'])
data['regular_total_cost'] = data['Registration_max'] + data['Other_costs'] + num_sessions*data['Class_max']

In [25]:
def get_per_city_min(language):
    per_city_min = data[np.logical_and(data['Language'] == language, ~data['Veterans_only'])][
        ['City', 'min_total_cost']].dropna().groupby('City').min()
    print("Absolute minimum cost per city, no veterans programs")
    print(per_city_min)

interact(get_per_city_min, language=languages)

A Jupyter Widget

<function __main__.get_per_city_min>

### Minimum cost of 52 week program per city, assuming client pays full cost for program

In [26]:
def get_per_city_reg(language):
    per_city_reg = data[np.logical_and(data['Language'] == language, ~data['Veterans_only'])][
        ['City', 'regular_total_cost']].dropna().groupby('City').min()
    print("Minimum cost per city, assuming full program cost, no veterans programs")
    print(per_city_reg)

interact(get_per_city_reg, language=languages)

A Jupyter Widget

<function __main__.get_per_city_reg>

### Average cost of a 52 week program per city. This ignores veterans programs.

In [31]:
def get_per_city_reg(language):
    per_city_average = data[np.logical_and(data['Language'] == language, ~data['Veterans_only'])][
        ['City', 'regular_total_cost']].dropna().groupby('City').mean().round(2)
    print(per_city_average)

interact(get_per_city_reg, language=languages)

A Jupyter Widget

<function __main__.get_per_city_reg>

## Veterans programs

All veterans programs are free, but for veterans only. There are only 3 veterans programs in the LA area.

In [30]:
vets = data[data['Veterans_only']][['City', 'Agency', 'Address']]
vets

Unnamed: 0,City,Agency,Address
48,East Los Angeles,East LA Vet Center,"5400 E Olympic Blvd, Commerce, CA 90022"
64,Gardena,Dept of Veterans Affairs,"1045 W Redondo Beach Blvd, Gardena, CA 90247"
212,Sherman Oaks,Dept of Veteran Affairs,"16111 Plummer St, North Hills, CA 91343"
