In [1]:
import pandas as pd

## Creating demographics .csv file so that the data can be easily added to city/county aggregations

### City-level

In [2]:
taxpop = pd.read_csv('data/tax_pop_city.csv').iloc[:,1:]
ces = pd.read_csv('data/ces_dac_city_proportion_median.csv')
income = pd.read_csv('data/census_median_income_by_city.csv')

In [3]:
taxpop.shape

(482, 7)

In [4]:
ces.shape

(789, 4)

In [5]:
income.shape

(789, 2)

^ the tax and population data only contains 482 cities because that's how many are in california... however our cpuc data has 537 cities, so we'll have to figure out what to do with those missing values.... might have to manually look up numbers :(

Because of that, I'll do a left merge on ces so that the 789 cities stay 

In [6]:
ces = ces.sort_values('city').reset_index(drop=True)

income = income.sort_values('city').reset_index(drop=True)

In [7]:
taxpop = taxpop.iloc[:, [0,4,5,6]]

In [8]:
taxpop.head()

Unnamed: 0,City,Total Tax Revenue,Average Tax Revenue,Population
0,Adelanto,23628867.0,7876289.0,34049.0
1,Agoura Hills,58614832.0,19538280.0,20222.0
2,Alameda,282357554.0,94119180.0,77624.0
3,Albany,57342690.0,19114230.0,19696.0
4,Alhambra,154981924.0,51660640.0,83750.0


### making ventura consistent

In [9]:
taxpop[taxpop.City.str.contains('entura')]

Unnamed: 0,City,Total Tax Revenue,Average Tax Revenue,Population
364,San Buenaventura,278789652.0,92929884.0,


In [10]:
taxpop.at[364,'Population'] = 109106

taxpop.at[364, 'City'] = 'VENTURA'

In [11]:
taxpop[taxpop.City.str.contains('VENTURA')]

Unnamed: 0,City,Total Tax Revenue,Average Tax Revenue,Population
364,VENTURA,278789652.0,92929884.0,109106.0


In [12]:
ces.city = ces.city.str.upper()
taxpop.City = taxpop.City.str.upper()
income.city = income.city.str.upper()

In [13]:
demo = ces.merge(taxpop, left_on= 'city', right_on = 'City', how ='left' )

In [14]:
demo.drop(columns='City', inplace=True)

In [15]:
demo.head()

Unnamed: 0,city,dac_proportion,ces_score_median,ces_percentile_median,Total Tax Revenue,Average Tax Revenue,Population
0,ACAMPO,0.0,21.531463,40.282507,,,
1,ACTON,0.0,14.513222,24.063564,,,
2,ADELANTO,0.5,35.908799,68.451255,23628867.0,7876289.0,34049.0
3,AGOURA HILLS,0.0,8.967396,10.827343,58614832.0,19538280.0,20222.0
4,ALAMEDA,0.0,20.596928,39.796948,282357554.0,94119180.0,77624.0


In [16]:
dem = demo.merge(income, left_on='city', right_on='city', how='left')

In [17]:
dem[dem['Population'].isnull()]

Unnamed: 0,city,dac_proportion,ces_score_median,ces_percentile_median,Total Tax Revenue,Average Tax Revenue,Population,median_household_income_usd
0,ACAMPO,0.0,21.531463,40.282507,,,,83913.0
1,ACTON,0.0,14.513222,24.063564,,,,105263.0
5,ALAMO,0.0,2.175088,0.416194,,,,219750.0
9,ALPINE,0.0,11.327197,15.897339,,,,100290.0
10,ALTA,0.0,11.769621,16.937823,,,,75563.0
...,...,...,...,...,...,...,...,...
778,WOODLAND HILLS,0.0,19.115863,36.101652,,,,106111.0
779,WRIGHTWOOD,0.0,14.743829,24.504982,,,,64841.0
781,YOSEMITE NTL PARK,0.0,20.147691,38.743852,,,,55994.0
782,YOSEMITE NATIONAL PARK,0.0,,,,,,47829.0


In [18]:
dem.replace('YOSEMITE NTL PARK', 'YOSEMITE NATIONAL PARK', inplace=True)

In [19]:
dem.tail(10)

Unnamed: 0,city,dac_proportion,ces_score_median,ces_percentile_median,Total Tax Revenue,Average Tax Revenue,Population,median_household_income_usd
779,WRIGHTWOOD,0.0,14.743829,24.504982,,,,64841.0
780,YORBA LINDA,0.0,14.379302,23.432968,108191037.0,36063680.0,67644.0,137520.5
781,YOSEMITE NATIONAL PARK,0.0,20.147691,38.743852,,,,55994.0
782,YOSEMITE NATIONAL PARK,0.0,,,,,,47829.0
783,YOUNTVILLE,0.0,10.848488,14.907302,32212771.0,10737590.0,2966.0,69300.0
784,YREKA,0.0,20.946433,40.623029,18300411.0,6100137.0,7518.0,47258.0
785,YUBA CITY,0.0625,25.733873,51.235969,95292052.0,31764020.0,67010.0,60333.5
786,YUCAIPA,0.0,20.904076,40.358179,57240312.0,19080100.0,53921.0,65956.0
787,YUCCA VALLEY,0.0,18.196599,33.711691,42210569.0,14070190.0,21777.0,36509.0
788,ZAMORA,0.0,29.949206,59.339135,,,,50129.0


In [20]:
dem.drop(index=782,inplace=True)

In [21]:
dem[dem.city == 'VENTURA']

Unnamed: 0,city,dac_proportion,ces_score_median,ces_percentile_median,Total Tax Revenue,Average Tax Revenue,Population,median_household_income_usd
738,VENTURA,0.08,19.78211,37.810569,278789652.0,92929884.0,109106.0,78115.0


### getting better population column

In [22]:
pop = pd.read_csv('data/total_population_by_city_2019.csv')

In [23]:
pop.city = pop.city.str.upper()
pop.shape

(789, 2)

In [24]:
dem.shape

(788, 8)

In [25]:
popi = dem.merge(pop, left_on='city', right_on='city', how='left')

In [26]:
popi[popi.total_population.isna()]

Unnamed: 0,city,dac_proportion,ces_score_median,ces_percentile_median,Total Tax Revenue,Average Tax Revenue,Population,median_household_income_usd,total_population


In [27]:
popi[popi.city.str.contains('ENTURA')]

Unnamed: 0,city,dac_proportion,ces_score_median,ces_percentile_median,Total Tax Revenue,Average Tax Revenue,Population,median_household_income_usd,total_population
738,VENTURA,0.08,19.78211,37.810569,278789652.0,92929884.0,109106.0,78115.0,109566


In [28]:
popi.drop(columns='Population', inplace=True)

In [29]:
popi.head()

Unnamed: 0,city,dac_proportion,ces_score_median,ces_percentile_median,Total Tax Revenue,Average Tax Revenue,median_household_income_usd,total_population
0,ACAMPO,0.0,21.531463,40.282507,,,83913.0,9814
1,ACTON,0.0,14.513222,24.063564,,,105263.0,7865
2,ADELANTO,0.5,35.908799,68.451255,23628867.0,7876289.0,45695.5,35179
3,AGOURA HILLS,0.0,8.967396,10.827343,58614832.0,19538280.0,128054.5,40671
4,ALAMEDA,0.0,20.596928,39.796948,282357554.0,94119180.0,101246.5,78522


In [30]:
popi.to_csv('data/city_demographics.csv', index=False)

-------
------
Merging the above dataframe with cpuc data

In [31]:
cityag = pd.read_csv('data/city_ag_2.csv').iloc[:,1:]

In [32]:
cityag.shape

(537, 10)

In [33]:
cityag.columns

Index(['SiteCity', 'TotalFirstYearGrosskWh', 'TotalFirstYearGrossTherm',
       'TotalLifecycleGrosskWh', 'TotalLifecycleGrossTherm', 'Budget',
       'Total Claims', 'Total Programs', '% OBF Budget', '% Resource Budget'],
      dtype='object')

In [34]:
cities = cityag.merge(popi, left_on='SiteCity',right_on='city',how='left')

In [35]:
cities.columns

Index(['SiteCity', 'TotalFirstYearGrosskWh', 'TotalFirstYearGrossTherm',
       'TotalLifecycleGrosskWh', 'TotalLifecycleGrossTherm', 'Budget',
       'Total Claims', 'Total Programs', '% OBF Budget', '% Resource Budget',
       'city', 'dac_proportion', 'ces_score_median', 'ces_percentile_median',
       'Total Tax Revenue', 'Average Tax Revenue',
       'median_household_income_usd', 'total_population'],
      dtype='object')

In [36]:
nanindex = cities[cities['Total Tax Revenue'].isna()].index

cities.drop(index=nanindex).to_csv('data/cities_incomplete.csv')

In [37]:
cities.to_csv('data/cities.csv', index=False)

## checking for validity of cities df

In [38]:
cities[cities.city != cities.SiteCity]

Unnamed: 0,SiteCity,TotalFirstYearGrosskWh,TotalFirstYearGrossTherm,TotalLifecycleGrosskWh,TotalLifecycleGrossTherm,Budget,Total Claims,Total Programs,% OBF Budget,% Resource Budget,city,dac_proportion,ces_score_median,ces_percentile_median,Total Tax Revenue,Average Tax Revenue,median_household_income_usd,total_population
18,ARMONA,1900.193904,-6.590763,9500.96952,-32.953813,7837.695729,2,1,0.000000,100.0,,,,,,,,
20,AROMAS,61619.141000,-311.125000,622569.07211,-3174.629260,40748.695406,7,1,0.000000,100.0,,,,,,,,
27,AVILA BEACH,15711.720320,-13.543690,149714.36966,-133.403994,7001.948021,34,2,0.000000,100.0,,,,,,,,
29,BAKERSIFELD,4681.400000,0.000000,56176.80000,0.000000,1062.968615,75,1,97.810788,100.0,,,,,,,,
30,BALBOA,7175.000000,0.000000,86100.00000,0.000000,4081.858160,3,1,100.000000,100.0,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
515,WEITCHPEC,15176.474100,-84.706204,75882.37050,-423.531018,9106.357239,5,1,0.000000,100.0,,,,,,,,
516,WEOTT,7155.238500,-107.056800,78508.11240,-1150.352640,7059.032136,2,1,0.000000,100.0,,,,,,,,
521,WESTLEY,50884.020000,-139.680000,650928.24000,-2234.880000,33596.997501,3,1,0.000000,100.0,,,,,,,,
527,WILLOW CREEK,15709.891800,-133.465498,120488.72660,-1106.991571,20594.447160,7,1,0.000000,100.0,,,,,,,,


In [39]:
cities.style.apply(lambda x: ['background: lightgreen' if x.city != x.SiteCity else '' for i in x], 
               axis=1)

Unnamed: 0,SiteCity,TotalFirstYearGrosskWh,TotalFirstYearGrossTherm,TotalLifecycleGrosskWh,TotalLifecycleGrossTherm,Budget,Total Claims,Total Programs,% OBF Budget,% Resource Budget,city,dac_proportion,ces_score_median,ces_percentile_median,Total Tax Revenue,Average Tax Revenue,median_household_income_usd,total_population
0,ACAMPO,48664.72674,-148.89033,583976.72088,-1786.68396,17684.568509,6,1,0.0,100.0,ACAMPO,0.0,21.531463,40.282507,,,83913.0,9814.0
1,ADELANTO,13576.844232,-30.037266,67884.22116,-150.18633,27320.063231,4,1,0.0,100.0,ADELANTO,0.5,35.908799,68.451255,23628867.0,7876289.0,45695.5,35179.0
2,ALAMO,73049.3926,118.64524,684999.97629,-1279.097025,48785.542683,15,1,88.044896,100.0,ALAMO,0.0,2.175088,0.416194,,,219750.0,9373.0
3,ALBANY,153765.922,-785.076334,842200.6074,-4288.07028,60926.193528,35,2,0.0,100.0,ALBANY,0.0,8.128606,8.740068,57342690.0,19114230.0,102361.0,16742.0
4,ALHAMBRA,21630.949392,-32.172462,120233.350224,-178.879938,48465.447601,7,1,0.0,100.0,ALHAMBRA,0.15,33.702151,66.143272,154981924.0,51660641.333333,59206.0,84647.0
5,ALTA,185.0,0.0,2220.0,0.0,12.839932,1,1,0.0,100.0,ALTA,0.0,11.769621,16.937823,,,75563.0,2932.0
6,ALTADENA,21139.6,0.0,253675.2,0.0,14367.503894,15,1,0.0,100.0,ALTADENA,0.0,17.00765,30.36953,,,101875.0,37326.0
7,AMERICAN CANYON,246321.56896,-390.57183,2569735.066951,-3193.530636,113629.615697,737,2,0.0,100.0,AMERICAN CANYON,0.0,14.583999,24.013116,68336979.0,22778993.0,104042.0,13352.0
8,ANAHEIM,2843.648112,-1.635315,14302.66896,-8.226646,4105.068431,3,1,0.0,100.0,ANAHEIM,0.366667,34.963112,68.356665,1004225482.0,334741827.333333,66446.0,347532.0
9,ANDERSON,153146.064,-137.865002,1627538.6715,-1050.015402,162295.572507,18,2,14.336812,100.0,ANDERSON,0.0,19.653113,37.425905,22567917.0,7522639.0,47839.0,23258.0


In [40]:
cityag[cityag.SiteCity.str.startswith('B')]

Unnamed: 0,SiteCity,TotalFirstYearGrosskWh,TotalFirstYearGrossTherm,TotalLifecycleGrosskWh,TotalLifecycleGrossTherm,Budget,Total Claims,Total Programs,% OBF Budget,% Resource Budget
28,BAKERSFIELD,15071710.0,-47685.548785,154592600.0,-501341.34947,5958406.0,18480,2,7.438344,100.0
29,BAKERSIFELD,4681.4,0.0,56176.8,0.0,1062.969,75,1,97.810788,100.0
30,BALBOA,7175.0,0.0,86100.0,0.0,4081.858,3,1,100.0,100.0
31,BALDWIN PARK,681793.9,-23.228872,8072174.0,-116.144359,422768.9,10,1,4.412624,100.0
32,BANTA,51475.5,0.0,368614.8,0.0,14839.8,1,1,0.0,100.0
33,BARSTOW,193153.7,-61.42185,889673.2,-307.10925,172944.0,12,1,11.9436,100.0
34,BAY POINT,49620.72,-622.20315,574755.0,-7096.6802,45875.81,65,1,0.0,100.0
35,BAYSIDE,6071.043,-122.335286,78559.35,-1647.14246,7783.888,41,1,0.0,100.0
36,BELMONT,151204.1,0.0,1625604.0,0.0,116113.2,10,2,0.0,100.0
37,BELVEDERE,7369.28,-44.849,51787.84,-333.2452,5473.69,7,1,0.0,100.0


In [41]:
cityag.loc[29, 'SiteCity'] = 'BAKERSFIELD'

In [42]:
cityag.head()

Unnamed: 0,SiteCity,TotalFirstYearGrosskWh,TotalFirstYearGrossTherm,TotalLifecycleGrosskWh,TotalLifecycleGrossTherm,Budget,Total Claims,Total Programs,% OBF Budget,% Resource Budget
0,ACAMPO,48664.72674,-148.89033,583976.72088,-1786.68396,17684.568509,6,1,0.0,100.0
1,ADELANTO,13576.844232,-30.037266,67884.22116,-150.18633,27320.063231,4,1,0.0,100.0
2,ALAMO,73049.3926,118.64524,684999.97629,-1279.097025,48785.542683,15,1,88.044896,100.0
3,ALBANY,153765.922,-785.076334,842200.6074,-4288.07028,60926.193528,35,2,0.0,100.0
4,ALHAMBRA,21630.949392,-32.172462,120233.350224,-178.879938,48465.447601,7,1,0.0,100.0


### fixing bakersfield - have to do this from here bc of the %obf and resource columns are calculated in the ag function

In [43]:
cityag[cityag.SiteCity=='BAKERSFIELD']

Unnamed: 0,SiteCity,TotalFirstYearGrosskWh,TotalFirstYearGrossTherm,TotalLifecycleGrosskWh,TotalLifecycleGrossTherm,Budget,Total Claims,Total Programs,% OBF Budget,% Resource Budget
28,BAKERSFIELD,15071710.0,-47685.548785,154592600.0,-501341.34947,5958406.0,18480,2,7.438344,100.0
29,BAKERSFIELD,4681.4,0.0,56176.8,0.0,1062.969,75,1,97.810788,100.0


In [44]:
cpuc = pd.read_csv('data/cpuc_with_budget_and_resource.csv')

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [45]:
df = cpuc.replace(to_replace='BAKERSIFELD', value='BAKERSFIELD')

In [46]:
df[df.SiteCity=='BAKERSFIELD'].Budget.sum()

11351822.495322328

In [47]:
df[df.SiteCity=='BAKERSFIELD'].groupby('OBF_Flag').sum().Budget.loc[True]/df[df.SiteCity=='BAKERSFIELD'].groupby('OBF_Flag').sum().Budget.sum()*100

8.746926425386812

In [48]:
df[df.SiteCity=='BAKERSFIELD'].groupby('OBF_Flag').sum().Budget.sum()

11351822.495321361

In [70]:
df[df.SiteCity=='BAKERSFIELD'].groupby('OBF_Flag').sum().TotalFirstYearGrosskWh.sum()

26813323.39281007

In [49]:
dff = cityag.copy()

In [50]:
dff.iloc[28:30]

Unnamed: 0,SiteCity,TotalFirstYearGrosskWh,TotalFirstYearGrossTherm,TotalLifecycleGrosskWh,TotalLifecycleGrossTherm,Budget,Total Claims,Total Programs,% OBF Budget,% Resource Budget
28,BAKERSFIELD,15071710.0,-47685.548785,154592600.0,-501341.34947,5958406.0,18480,2,7.438344,100.0
29,BAKERSFIELD,4681.4,0.0,56176.8,0.0,1062.969,75,1,97.810788,100.0


In [51]:
dff.iloc[28:30].sum()

SiteCity                    BAKERSFIELDBAKERSFIELD
TotalFirstYearGrosskWh                 1.50764e+07
TotalFirstYearGrossTherm                  -47685.5
TotalLifecycleGrosskWh                 1.54649e+08
TotalLifecycleGrossTherm                   -501341
Budget                                 5.95947e+06
Total Claims                                 18555
Total Programs                                   3
% OBF Budget                               105.249
% Resource Budget                              200
dtype: object

In [52]:
dff[dff.SiteCity == 'BAKERSFIELD']

Unnamed: 0,SiteCity,TotalFirstYearGrosskWh,TotalFirstYearGrossTherm,TotalLifecycleGrosskWh,TotalLifecycleGrossTherm,Budget,Total Claims,Total Programs,% OBF Budget,% Resource Budget
28,BAKERSFIELD,15071710.0,-47685.548785,154592600.0,-501341.34947,5958406.0,18480,2,7.438344,100.0
29,BAKERSFIELD,4681.4,0.0,56176.8,0.0,1062.969,75,1,97.810788,100.0


In [53]:
5.95947e+06/11351822.495321361

0.5249791390286614

## the above budgets do not match, which means something else is going on with the numbers, smth might haven happened in the aggregation so just have to do it again

# **IT'S BECAUSE OF LOCAL GOVERNMENT PROGRAMS AGAIN** 

In [54]:
df.SiteCity = df.SiteCity.str.upper()

df.drop_duplicates(inplace=True)

def no_dupes(df):
    
    #creating df of number of times a claim repeats

    repeats = df['ClaimID'].value_counts().to_frame().reset_index()

    #getting list of duplicate claims
    
    all_dupes = repeats[repeats['ClaimID']>1]
    dupes = all_dupes['index'].to_list()
   

    #getting one index value per duplicate claim

    goodi=[]
    for e in dupes:
        goodi.append(min(df[df['ClaimID'].isin([e])].index.to_list()))
    
    # full list of duplicate claim indicies

    alli=[]
    for e in dupes:
        alli = alli + df[df['ClaimID'].isin([e])].index.to_list()


    # take goodi from alli
    badi = [i for i in alli if i not in goodi]

    #drop duplicate indicies
    return(df.drop(index=badi).reset_index())

column_list = ['TotalFirstYearGrosskWh',
       'TotalFirstYearGrossTherm','TotalLifecycleGrosskWh',
       'TotalLifecycleGrossTherm','Budget']
def aggregate_me(df, by, lg, column_list):
    
    # making df usable

    df[by] = df[by].str.upper()
    
    #filtering for local government

    if lg: 

        lglist = ['Local Government (Counties)',
         'Local Government ()',
         'Local Government (Cities)',
         'Local Government ']

        df = df[df['Grouping'].isin(lglist)]


    # creating a total programs column

    prg = df.groupby([by, 'PrgID'], dropna = False).size().index.tolist() 
    #need to count the number of times a city/county shows up 

    e,f = [list(c) for c in zip(*prg)]

    ccp = pd.Series(e) # made a series so can use value_counts

    ccp = ccp.str.upper()

    tp = ccp.value_counts(dropna = False) #total programs per city
    
    # creating a total claims column
    
    df['Total Claims'] = [1]*len(df)
    
    #aggregate!
    relevant_columns = column_list + ['Total Claims']
    agg = df.groupby(by).sum()[relevant_columns]
    agg['Total Programs'] = tp
    
    # Resource/OBF budget as percentage of total budget
    obf_budget = df.groupby(['SiteCity', 'OBF_Flag']).Budget.sum().unstack().fillna(0).loc[:, True]
    resource_budget = df.groupby(['SiteCity', 'Resource_Flag']).Budget.sum().unstack().fillna(0).loc[:, 1]
    agg['% OBF Budget'] = (obf_budget/agg['Budget'])*100
    agg['% Resource Budget'] = (resource_budget/agg['Budget'])*100
    
    return(agg.reset_index())

## tried redoing the ag, no_dupes took super long to run, so maybe those bakersifeld cities are just duplicates?

repeats = df['ClaimID'].value_counts().to_frame().reset_index()
    
all_dupes = repeats[repeats['ClaimID']>1]

dupes = all_dupes['index'].to_list()

d = df[df['ClaimID'].isin(dupes)]

d[d.SiteCity == 'BAKERSFIELD'].head()

repeats1 = cpuc['ClaimID'].value_counts().to_frame().reset_index()
    
all_dupes1 = repeats1[repeats1['ClaimID']>1]

dupes1 = all_dupes1['index'].to_list()

dc = cpuc[cpuc['ClaimID'].isin(dupes1)]

dc[dc.SiteCity == 'BAKERSIFELD'].head()

cpuc[cpuc.SiteCity=='BAKERSIFELD']

## ok from this it we can say that the bakersifeld entries are not duplicates of the other bakersfield claims :(



In [75]:
lg = cpuc[cpuc['Grouping'].isin(['Local Government (Counties)',
         'Local Government ()',
         'Local Government (Cities)',
         'Local Government '])]

In [81]:
b = lg[lg.SiteCity.isin(['BAKERSFIELD', 'BAKERSIFELD'])]

In [83]:
b.groupby('OBF_Flag').sum().Budget.sum()

6153282.741686904

In [96]:
b.drop_duplicates()

Unnamed: 0.1,Unnamed: 0,ClaimID,PrgID,Sector_x,SiteCity,SiteZipCode,SiteID,NAICSCode,BldgHVAC,BldgLoc,...,Custom_Flag,Upstream_Flag_y,Midstream_Flag,Downstream_Flag,DirectInstall,Audit_Flag,Financing,ParentProgram,Exclude_From_Budget,Exclude_From_CE
1173,1173,PGE-2017-Q2-14479,PGE211011,Com,BAKERSFIELD,93309,PGE-993733,621210,cWtd,CZ13,...,True,False,False,True,Partial,True,,Government Partnership Programs,False,False
1190,1190,PGE-2017-Q4-26357,PGE211011,Com,BAKERSFIELD,93308,PGE-1121840,447110,cDXGF,CZ13,...,True,False,False,True,Partial,True,,Government Partnership Programs,False,False
1191,1191,PGE-2017-Q2-42913,PGE211011,Com,BAKERSFIELD,93301,PGE-978312,811490,cWtd,CZ13,...,True,False,False,True,Partial,True,,Government Partnership Programs,False,False
1310,1310,PGE-2017-Q2-14185,PGE211011,Com,BAKERSFIELD,93313,PGE-1012102,531000,cWtd,CZ13,...,True,False,False,True,Partial,True,,Government Partnership Programs,False,False
1966,1966,PGE-2017-Q4-117168,PGE211011,Com,BAKERSFIELD,93301,PGE-1101209,453998,Any,CZ13,...,True,False,False,True,Partial,True,,Government Partnership Programs,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
323225,323225,PGE-2019-Q3-50882,PGE211011,Public,BAKERSFIELD,93305,PGE-1609454,333111,Any,CZ13,...,True,False,False,True,Partial,True,,Government Partnership Programs,False,False
323226,323226,PGE-2019-Q3-50883,PGE211011,Public,BAKERSFIELD,93308,PGE-1605919,238910,Any,CZ13,...,True,False,False,True,Partial,True,,Government Partnership Programs,False,False
323228,323228,PGE-2019-Q3-50885,PGE211011,Public,BAKERSFIELD,93304,PGE-1609310,442299,Any,CZ13,...,True,False,False,True,Partial,True,,Government Partnership Programs,False,False
323229,323229,PGE-2019-Q3-50886,PGE211011,Public,BAKERSFIELD,93308,PGE-1596107,441320,Any,CZ13,...,True,False,False,True,Partial,True,,Government Partnership Programs,False,False


In [97]:
b.PrgID.value_counts()

PGE211011     18604
PGE2110051       39
Name: PrgID, dtype: int64

In [90]:
goodb = b[column_list].sum()
goodb.head()

TotalFirstYearGrosskWh      1.555928e+07
TotalFirstYearGrossTherm   -5.031314e+04
TotalLifecycleGrosskWh      1.596638e+08
TotalLifecycleGrossTherm   -5.303483e+05
Budget                      6.153283e+06
dtype: float64

In [95]:
obf = b.groupby('OBF_Flag').sum().Budget.loc[True]/b.groupby('OBF_Flag').sum().Budget.sum()*100
obf

7.219449909310274

In [91]:
cities.columns

Index(['SiteCity', 'TotalFirstYearGrosskWh', 'TotalFirstYearGrossTherm',
       'TotalLifecycleGrosskWh', 'TotalLifecycleGrossTherm', 'Budget',
       'Total Claims', 'Total Programs', '% OBF Budget', '% Resource Budget',
       'city', 'dac_proportion', 'ces_score_median', 'ces_percentile_median',
       'Total Tax Revenue', 'Average Tax Revenue',
       'median_household_income_usd', 'total_population'],
      dtype='object')

In [98]:
goodb['Total Claims'] = len(b)

goodb['Total Programs'] = 2

goodb['% OBF Budget'] = obf

goodb['% Resource Budget'] = 100

In [102]:
goodb

TotalFirstYearGrosskWh      1.555928e+07
TotalFirstYearGrossTherm   -5.031314e+04
TotalLifecycleGrosskWh      1.596638e+08
TotalLifecycleGrossTherm   -5.303483e+05
Budget                      6.153283e+06
Total Claims                1.864300e+04
Total Programs              2.000000e+00
% OBF Budget                7.219450e+00
% Resource Budget           1.000000e+02
dtype: float64

In [118]:
s = goodb.append(cities[cities.SiteCity == 'BAKERSFIELD'].loc[:,'city':'total_population'].squeeze())

In [115]:
goodb['SiteCity'] = 'BAKERSFIELD'

In [126]:
s

TotalFirstYearGrosskWh         1.55593e+07
TotalFirstYearGrossTherm          -50313.1
TotalLifecycleGrosskWh         1.59664e+08
TotalLifecycleGrossTherm           -530348
Budget                         6.15328e+06
Total Claims                         18643
Total Programs                           2
% OBF Budget                       7.21945
% Resource Budget                      100
SiteCity                       BAKERSFIELD
city                           BAKERSFIELD
dac_proportion                    0.494505
ces_score_median                   38.5997
ces_percentile_median              73.9185
Total Tax Revenue              5.47374e+08
Average Tax Revenue            1.82458e+08
median_household_income_usd          53187
total_population                    557492
dtype: object

In [120]:
city = cities.drop(index=29)

In [121]:
city[28] = s

In [124]:
city.iloc[28,:] = s

In [125]:
city[city.SiteCity.str.startswith('B')]

Unnamed: 0,SiteCity,TotalFirstYearGrosskWh,TotalFirstYearGrossTherm,TotalLifecycleGrosskWh,TotalLifecycleGrossTherm,Budget,Total Claims,Total Programs,% OBF Budget,% Resource Budget,city,dac_proportion,ces_score_median,ces_percentile_median,Total Tax Revenue,Average Tax Revenue,median_household_income_usd,total_population,28
28,BAKERSFIELD,15559280.0,-50313.143735,159663800.0,-530348.286997,6153283.0,18643,2,7.21945,100.0,BAKERSFIELD,0.494505,38.59974,73.918527,547373792.0,182457900.0,53187.0,557492.0,
30,BALBOA,7175.0,0.0,86100.0,0.0,4081.858,3,1,100.0,100.0,,,,,,,,,
31,BALDWIN PARK,681793.9,-23.228872,8072174.0,-116.144359,422768.9,10,1,4.412624,100.0,BALDWIN PARK,0.941176,53.149054,91.663514,87887248.0,29295750.0,66316.0,75905.0,
32,BANTA,51475.5,0.0,368614.8,0.0,14839.8,1,1,0.0,100.0,,,,,,,,,
33,BARSTOW,193153.7,-61.42185,889673.2,-307.10925,172944.0,12,1,11.9436,100.0,BARSTOW,0.666667,46.052962,84.127885,67996702.0,22665570.0,41909.5,31403.0,
34,BAY POINT,49620.72,-622.20315,574755.0,-7096.6802,45875.81,65,1,0.0,100.0,,,,,,,,,
35,BAYSIDE,6071.043,-122.335286,78559.35,-1647.14246,7783.888,41,1,0.0,100.0,BAYSIDE,0.0,12.013815,17.593644,,,61554.0,5229.0,
36,BELMONT,151204.1,0.0,1625604.0,0.0,116113.2,10,2,0.0,100.0,BELMONT,0.0,8.604714,10.001261,83965540.0,27988510.0,143295.0,27359.0,
37,BELVEDERE,7369.28,-44.849,51787.84,-333.2452,5473.69,7,1,0.0,100.0,,,,,,,,,
38,BEN LOMOND,17841.03,-166.905,124730.5,-851.28066,8335.55,3,1,0.0,100.0,BEN LOMOND,0.0,7.476653,7.712196,,,93123.0,8366.0,


In [127]:
city.drop(columns=28, inplace=True)

In [128]:
city.head()

Unnamed: 0,SiteCity,TotalFirstYearGrosskWh,TotalFirstYearGrossTherm,TotalLifecycleGrosskWh,TotalLifecycleGrossTherm,Budget,Total Claims,Total Programs,% OBF Budget,% Resource Budget,city,dac_proportion,ces_score_median,ces_percentile_median,Total Tax Revenue,Average Tax Revenue,median_household_income_usd,total_population
0,ACAMPO,48664.72674,-148.89033,583976.72088,-1786.68396,17684.568509,6,1,0.0,100.0,ACAMPO,0.0,21.531463,40.282507,,,83913.0,9814.0
1,ADELANTO,13576.844232,-30.037266,67884.22116,-150.18633,27320.063231,4,1,0.0,100.0,ADELANTO,0.5,35.908799,68.451255,23628867.0,7876289.0,45695.5,35179.0
2,ALAMO,73049.3926,118.64524,684999.97629,-1279.097025,48785.542683,15,1,88.044896,100.0,ALAMO,0.0,2.175088,0.416194,,,219750.0,9373.0
3,ALBANY,153765.922,-785.076334,842200.6074,-4288.07028,60926.193528,35,2,0.0,100.0,ALBANY,0.0,8.128606,8.740068,57342690.0,19114230.0,102361.0,16742.0
4,ALHAMBRA,21630.949392,-32.172462,120233.350224,-178.879938,48465.447601,7,1,0.0,100.0,ALHAMBRA,0.15,33.702151,66.143272,154981924.0,51660640.0,59206.0,84647.0


In [129]:
city[city['SiteCity']=='VENTURA']

Unnamed: 0,SiteCity,TotalFirstYearGrosskWh,TotalFirstYearGrossTherm,TotalLifecycleGrosskWh,TotalLifecycleGrossTherm,Budget,Total Claims,Total Programs,% OBF Budget,% Resource Budget,city,dac_proportion,ces_score_median,ces_percentile_median,Total Tax Revenue,Average Tax Revenue,median_household_income_usd,total_population
508,VENTURA,906599.361168,36753.986211,13021450.0,588244.331056,823356.768848,9,1,0.0,100.0,VENTURA,0.08,19.78211,37.810569,278789652.0,92929884.0,78115.0,109566.0


In [137]:
city.reset_index().drop(columns='index').to_csv('data/cities_1.csv', index=False)

In [136]:
city[city.SiteCity != city.city]

Unnamed: 0,SiteCity,TotalFirstYearGrosskWh,TotalFirstYearGrossTherm,TotalLifecycleGrosskWh,TotalLifecycleGrossTherm,Budget,Total Claims,Total Programs,% OBF Budget,% Resource Budget,city,dac_proportion,ces_score_median,ces_percentile_median,Total Tax Revenue,Average Tax Revenue,median_household_income_usd,total_population
18,ARMONA,1900.193904,-6.590763,9500.96952,-32.953813,7837.695729,2,1,0.0,100.0,,,,,,,,
20,AROMAS,61619.141000,-311.125000,622569.07211,-3174.629260,40748.695406,7,1,0.0,100.0,,,,,,,,
27,AVILA BEACH,15711.720320,-13.543690,149714.36966,-133.403994,7001.948021,34,2,0.0,100.0,,,,,,,,
30,BALBOA,7175.000000,0.000000,86100.00000,0.000000,4081.858160,3,1,100.0,100.0,,,,,,,,
32,BANTA,51475.500000,0.000000,368614.80000,0.000000,14839.803831,1,1,0.0,100.0,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
515,WEITCHPEC,15176.474100,-84.706204,75882.37050,-423.531018,9106.357239,5,1,0.0,100.0,,,,,,,,
516,WEOTT,7155.238500,-107.056800,78508.11240,-1150.352640,7059.032136,2,1,0.0,100.0,,,,,,,,
521,WESTLEY,50884.020000,-139.680000,650928.24000,-2234.880000,33596.997501,3,1,0.0,100.0,,,,,,,,
527,WILLOW CREEK,15709.891800,-133.465498,120488.72660,-1106.991571,20594.447160,7,1,0.0,100.0,,,,,,,,


In [143]:
ces[ces.city=='SPRING VALLEY']

Unnamed: 0,city,dac_proportion,ces_score_median,ces_percentile_median
678,SPRING VALLEY,0.0,20.637078,39.929373


In [145]:
nocounty = ['VALENCIA',
 'CITY OF INDUSTRY',
 'MC FARLAND',
 'PLUMAS LAKE',
 'SPRING VALLEY',
 'MARIPOSA',
 'SAN JUAN CAPO',
 'WINCHESTER',
 'RAMONA',
 'FALLBROOK',
 'RIVERDALE',
 'NEWBURY PARK',
 'ALTADENA',
 'SN BERNRDNO',
 'LE GRAND',
 'SAN YSIDRO',
 'LA JOLLA',
 'CARUTHERS',
 'OLIVEHURST',
 'PLANADA',
 'O NEALS',
 'LA CANADA',
 'MOJAVE',
 'CUTTEN',
 'MIRA LOMA',
 'LAKESIDE',
 'OAKHURST',
 'SAUGUS',
 'CASTRO VALLEY',
 'NEWCASTLE',
 'RCH CUCAMONGA',
 'TRABUCO CANYON',
 'STEVENSON RANCH',
 'LA CRESCENTA',
 'ALPINE',
 'SUN CITY',
 'HUNTINGTON PK',
 'VALLEY CENTER',
 'HUNTINGTN BCH',
 'ROWLAND HEIGHTS',
 'SAN PEDRO',
 'BONITA',
 'ALTA LOMA',
 'UNIVERSAL CITY',
 'RODEO',
 'RESEDA',
 'FIVE POINTS',
 'OCEANO',
 'WINTON',
 'BLOOMINGTON',
 'CATHEDRAL CTY',
 'NIPOMO',
 'MIDWAY CITY',
 'JAMUL',
 'LOST HILLS',
 'SYLMAR',
 'HACIENDA HEIGHTS',
 'NORTHRIDGE',
 'N HOLLYWOOD',
 'TEMPLETON',
 'BRIDGEPORT',
 'FRENCH CAMP',
 'MCKINLEYVILLE',
 'ALAMO',
 'OAK VIEW',
 'WILMINGTON',
 'EL DORADO HILLS',
 'FOUNTAIN VLY',
 'MARTELL',
 'EL SOBRANTE',
 'NORTH FORK',
 'DEL REY',
 'ROSAMOND',
 'COULTERVILLE',
 'SAINT HELENA',
 'DELHI',
 'SNELLING',
 'EL NIDO',
 'BONSALL',
 'SHINGLE SPRINGS',
 'CANOGA PARK',
 'DSRT HOT SPGS',
 'JULIAN',
 'LA GRANGE',
 'LAMONT',
 'ACTON',
 'LITTLEROCK',
 'LAKE ARROWHEAD',
 'KETTLEMAN CITY',
 'CARDIFF',
 'SAN MIGUEL',
 'FOOTHILL RANCH',
 'WOODLAND HLS',
 'BORREGO SPRINGS',
 'SHERMAN OAKS',
 'LOS OSOS',
 'CAMERON PARK',
 'RAISIN CITY',
 'WINNETKA',
 'AHWAHNEE',
 'BURREL',
 'HARBOR CITY',
 'VAN NUYS',
 'CAMBRIA',
 'CANYON COUNTRY',
 'CORONA DEL MAR',
 'LOS MOLINOS',
 'SANTA FE SPGS',
 'CARMEL',
 'APTOS',
 'PENN VALLEY',
 'LADERA RANCH',
 'STANFORD',
 'GRANITE BAY',
 'WESTLEY',
 'BAY POINT',
 'NEWHALL',
 'HELENDALE',
 'PHELAN',
 'BAKERSIFELD',
 'FRAZIER PARK',
 'BANTA',
 'HAWAIIAN GDNS',
 'W HOLLYWOOD',
 'CASTAIC',
 'GERBER',
 'VENICE',
 'GARBERVILLE',
 'SHINGLETOWN',
 'SAN LORENZO',
 'HOLLYWOOD',
 'OAK PARK',
 'PEBBLE BEACH',
 'HELM',
 'RUNNING SPRINGS',
 'CAMPO',
 'LEE VINING',
 'TECATE',
 'CANYON CNTRY',
 'MONTROSE',
 'CABAZON',
 'EDWARDS',
 'ROWLAND HGHTS',
 'CALIFORNIA CITY',
 'ROMOLAND',
 'PIXLEY',
 'OROSI',
 'SOQUEL',
 'ARLETA',
 'FELTON',
 'NEWPORT COAST',
 'SAN SIMEON',
 'BAYSIDE',
 'RANCHO SANTA FE',
 'SN LUIS OBISP',
 'MARINA DEL REY',
 'COARSEGOLD',
 'FAIR OAKS',
 'LEBEC',
 'IDYLLWILD',
 'FORESTVILLE',
 'STUDIO CITY',
 'LA',
 'SOUTH DOS PALOS',
 'NORTH HILLS',
 'RCHO STA MARG',
 'ORCUTT',
 'COTTONWOOD',
 'RNCHO PLS VRD',
 'ORANGEVALE',
 'BOULDER CREEK',
 'CITY INDUSTRY',
 'AVILA BEACH',
 'PACHECO',
 'MENTONE',
 'DESCANSO',
 'ALPAUGH',
 'CASTROVILLE',
 'GLEN ELLEN',
 'PANORAMA CITY',
 'LOLETA',
 'ISLA VISTA',
 'PLEASANT GROVE',
 'BYRON',
 'ANGELS CAMP',
 'WEST HILLS',
 'CHATSWORTH',
 'ENCINO',
 'CAPISTRANO BCH',
 'JOSHUA TREE',
 'MORONGO VALLEY',
 'ANTELOPE',
 'CARMICHAEL',
 'QUARTZ HILL',
 'GRANADA HILLS',
 'LLANO',
 'FRIANT',
 'BIG SUR',
 'KELSEYVILLE',
 'KENTFIELD',
 'WARNER SPRINGS',
 'SUNLAND',
 'HACIENDA HGTS',
 'EARLIMART',
 'YERMO',
 'QUINCY',
 'MARINA DL REY',
 'RANCHO DOMINGUEZ',
 'ARBUCKLE',
 'PACOIMA',
 'CAYUCOS',
 'THOUSAND PLMS',
 'MIDDLETOWN',
 'VALLEY SPRINGS',
 'PINE VALLEY',
 'KENWOOD',
 'JOLON',
 'QUAIL VALLEY',
 'BOULEVARD',
 'GEYSERVILLE',
 'BUTTONWILLOW',
 'PRINCETON',
 'LINDEN',
 'MANHATTAN BCH',
 'TIPTON',
 'INYOKERN',
 'LATON',
 'SUN VALLEY',
 'JUNE LAKE',
 'RICHVALE',
 'STEVINSON',
 'ETIWANDA',
 'TWAIN HARTE',
 'JAMESTOWN',
 'CAPO BEACH',
 'ARMONA',
 'PEARBLOSSOM',
 'SPRECKELS',
 'SUNSET BEACH',
 'MAXWELL',
 'WESTLAKE VLG',
 'FOOTHILL RNCH',
 'S EL MONTE',
 'TUJUNGA',
 'GUERNEVILLE',
 'CUTLER',
 'MISSION HILLS',
 'EAST RANCHO DOMINGUEZ',
 'PALO CEDRO',
 'CATHEYS VALLEY',
 'NORTH HOLLYWOOD',
 'NORTH PALM SPRINGS',
 'ANGWIN',
 'RUTHERFORD',
 'SAN MARTIN',
 'WEOTT',
 'AGOURA',
 'PAUMA VALLEY',
 'TUOLUMNE',
 'REDWAY',
 'GORMAN',
 'REDWOOD VALLEY',
 'DIAMOND SPRINGS',
 'VERNALIS',
 'CTY OF CMMRCE',
 'PINE GROVE',
 'TARZANA',
 'LA CANADA FLT',
 'GOSHEN',
 'TOMALES',
 'NUEVO',
 'KENSINGTON',
 'SOMIS',
 'TRAVIS AFB',
 'HOOPA',
 'RICHGROVE',
 'STEVENSON RNH',
 'JACUMBA',
 'MONTE RIO',
 'MENDOCINO',
 'MYERS FLAT',
 'TRAVER',
 'NO HIGHLANDS',
 'ACAMPO',
 'WESTCHESTER',
 'NICE',
 'GREENBRAE',
 'TRONA',
 'WEST SAN RAFAEL',
 'WHITEWATER',
 'RLLNG HLS EST',
 'FOREST FALLS',
 'PIONEER',
 'PALERMO',
 'SUTTER',
 'OAK HILLS',
 'SANTA NELLA',
 'THREE RIVERS',
 'DUNCANS MILLS',
 'CORONA DL MAR',
 'LAKE ISABELLA',
 'CAZADERO',
 'CROWS LANDING',
 'LUCERNE',
 'FORESTHILL',
 'GRATON',
 'PAYNES CREEK',
 'BERMUDA DUNES',
 'HOPLAND',
 'ESPARTO',
 'LAKE HUGHES',
 'UPPER LAKE',
 'AROMAS',
 'LUCERNE VALLEY',
 'CLEARLAKE OAKS',
 'SANTA YNEZ',
 'PENRYN',
 'YOSEMITE NATIONAL PARK',
 'CRESTLINE',
 'BURNEY',
 'LOCKEFORD',
 'POLLOCK PINES',
 'CRESSEY',
 'S PASADENA',
 'INVERNESS',
 'BODEGA BAY',
 'BIRDS LANDING',
 'SANTA YSABEL',
 'BAKER',
 'MAGALIA',
 'LA CANADA FLINTRIDGE',
 'ROBBINS',
 'STANDFORD',
 'PINEDALE',
 'COLOMA',
 'HOMELAND',
 'MEADOW VISTA',
 'LAGUNITAS',
 'N PALM SPGS',
 'WILLOW CREEK',
 'BOONVILLE',
 'LOWER LAKE',
 'NICASIO',
 'PATTON',
 'SCOTIA',
 'POINT REYES STATION',
 'WOODLAND HILLS',
 'SILVERADO',
 'SLOUGHHOUSE',
 'NORTH HIGHLANDS',
 'MONTECITO',
 'CARMEL VALLEY',
 'WRIGHTWOOD',
 'DUNNIGAN',
 'CHALFANT VALLEY',
 'MOSS LANDING',
 'GEORGETOWN',
 'WOODACRE',
 'BKD',
 'STRATFORD',
 'SAN GERONIMO',
 'TWENTYNIN PLM',
 'DISCOVERY BAY',
 'ROBINSON RANCH',
 'STRATHMORE',
 'PESCADERO',
 'GOLD RUN',
 'PHILO',
 'S SAN GABRIEL',
 'TRANQUILLITY',
 'OCCIDENTAL',
 'VALLEY VLG',
 'PHILLIPSVILLE',
 'BALBOA',
 'PACIFIC PLSDS',
 'COBB',
 'BENTON',
 'WILLIMAS',
 'CARLOTTA',
 'EL PORTAL',
 'MONARCH BEACH',
 'PIRU',
 'ELK CREEK',
 'FINLEY',
 'LITTLE RIVER',
 'SAN ANDREAS',
 'SHASTA LAKE CITY',
 'PENNGROVE',
 'JENNER',
 'LEONA VALLEY',
 'RIO OSO',
 'CAMINO',
 'PETROLIA',
 'FREEDOM',
 'KORBEL',
 'MUSCOY',
 'BEN LOMOND',
 'CASSEL',
 'DILLON BEACH',
 'OAKVILLE',
 'REDCREST',
 'FARMINGTON',
 'SQUAW VALLEY',
 'VINA',
 'IVANHOE',
 'INDEPENDENCE',
 'WHEELER RIDGE',
 'LAKEHEAD',
 'BRADLEY',
 'HAMILTON CITY',
 'ROHNERT',
 'DURHAM',
 'WEITCHPEC',
 'DOWNIEVILLE',
 'RED TOP',
 'RIO LINDA',
 'ORICK',
 'DUCOR',
 'FIELDBROOK',
 'KNIGHTS LANDING',
 'CAMP MEEKER',
 'SUMMIT',
 'AGUANGA',
 'SANTA MARGARITA',
 'MECCA',
 'SAMOA',
 'SOMERSET',
 'BUTTE CITY',
 'MOUNT LAGUNA',
 'SAN ARDO',
 'TRES PINOS',
 'FOREST KNOLLS',
 'BROWNS VALLEY',
 'COYOTE',
 'LUDLOW',
 'BRIDGEVILLE',
 'VALLEY VILLAGE',
 'ARNOLD',
 'FIELDS LANDING',
 'KNIGHTSEN',
 'MILIPITAS',
 'LAS FLORES',
 'MONOLITH',
 'SAN RAMOS',
 'FULTON',
 'FISH CAMP',
 'MURPHYS',
 'UNION CITYCA',
 'RANCHITA',
 'SAN LOUIS OBISPO',
 'MONTARA',
 'ORO GRANDE',
 'MIRANDA',
 'BERKLEY',
 'PRATHER',
 'PAICINES',
 'EAST LOS ANGELES',
 'POTRERO',
 'CHUALAR',
 'LA HONDA',
 'EAST NICOLAUS',
 'EL TORO',
 'BETHEL ISLAND',
 'HIDDEN VALLEY LAKE',
 'SODA SPRINGS',
 'JOHANNESBURG',
 'DESERT CENTER',
 'CLEMENTS',
 'LAKE TAHOE',
 'RCH MSN VIEJO',
 'GUALALA',
 'ELVERTA',
 'BEAR VALLEY',
 'VALLEY FORD',
 'LAKEVIEW',
 'HYDESVILLE',
 'COPPEROPOLIS',
 'LA SELVA BEACH',
 'POPLAR',
 'CROWLEY LAKE',
 'MOUNTAIN HOUSE',
 'CHERRY VALLEY',
 'CARMEL BY THE SEA',
 'OLD STATION',
 'RIPLEY',
 'LAKE SHERWOOD',
 'SHELL BEACH',
 'BROOKS',
 'VALLEY ACRES',
 'WEST POINT',
 'CARRISA PLAINS',
 'ELMIRA',
 'BIOLA',
 'LAYTONVILLE',
 'ROSSMOOR',
 'GROVELAND',
 'MULTI SITE',
 'NEWBERRY SPRINGS',
 'DIABLO',
 'HEBER',
 'FALL RIVER MILLS',
 'LOS OLIVOS',
 'CRESCENT MILLS',
 'TECOPA',
 'WESTWOOD',
 'MANCHESTER',
 'GRIMES',
 'YOLO',
 'CANYON',
 'BROOKDALE',
 'TWIN PEAKS',
 'ALVISO',
 'BURNT RANCH',
 'BELLA VISTA',
 'PRESIDIO',
 'ST  HELENA',
 'CALIF CITY',
 'DELEVAN',
 'LA HABRA HGTS',
 'YETTEM',
 'SAN FRANCISO',
 'MOUNT HERMON',
 'HARMONY',
 'EL GRANADA',
 'SALIDA',
 'SAN FRANSISCO',
 'CHALLENGE',
 'LK ARROWHEAD',
 'GREENWOOD',
 'SAN LUIS OBISPO',
 'DAVENPORT',
 'FERNBRIDGE',
 'MOFFETT FIELD',
 'SANTA MARGAR',
 'ORLEANS',
 'BENECIA',
 'SULTANA',
 'MILLVILLE',
 'PARKER DAM',
 'PLAYA DEL REY',
 'PORT COSTA',
 'GARDEN VALLEY',
 'HACIENDA HIGHTS',
 'RAYMOND',
 'CALPELLA',
 'MADISON',
 'LOS ANGLEES',
 'COTO DE CAZA',
 'TWENTY NINE PALMS',
 'MCCLELLAN',
 'POINT MUGU',
 'ALTA',
 'GLENN',
 'SKY FOREST',
 'CRESTON',
 'CLARKSBURG',
 'FOREST RANCH',
 'VERDUGO CITY',
 'RIO NIDO',
 'SHANDON',
 'SOMES BAR',
 'KNEELAND',
 'POTTER VALLEY',
 'HONEYDEW',
 'LOS ALAMOS',
 'MODJESKA',
 'TWENTYNINE',
 'MANILA',
 'MANHATTAN',
 'CAPAY']

In [148]:
ces[ces.city.isin(nocounty)]

Unnamed: 0,city,dac_proportion,ces_score_median,ces_percentile_median
0,ACAMPO,0.0,21.531463,40.282507
1,ACTON,0.0,14.513222,24.063564
5,ALAMO,0.0,2.175088,0.416194
9,ALPINE,0.0,11.327197,15.897339
10,ALTA,0.0,11.769621,16.937823
...,...,...,...,...
771,WINNETKA,0.5,38.864448,74.397780
774,WINTON,1.0,42.836518,79.978560
778,WOODLAND HILLS,0.0,19.115863,36.101652
779,WRIGHTWOOD,0.0,14.743829,24.504982


_______
# **RUCA CODES for cities** 
#### RUCA codes are Lou's suggestion for measuring rurality on the city level, he doesn't actually like them because Ojai != Mammoth Lakes in terms of rurality but they have similar ruca scores.....

In [20]:
r = pd.read_csv('data/ruca2010revised.csv')

In [21]:
r.head()

Unnamed: 0,State-County FIPS Code,State,County,census_tract,RUCA code,"Secondary RUCA Code, 2010 (see errata)","Tract Population, 2010","Land Area (square miles), 2010","Population Density (per square mile), 2010"
0,1001,AL,Autauga County,1001020100,1,1.0,1912,3.8,504.8
1,1001,AL,Autauga County,1001020200,1,1.0,2170,1.3,1682.5
2,1001,AL,Autauga County,1001020300,1,1.0,3373,2.1,1633.1
3,1001,AL,Autauga County,1001020400,1,1.0,4386,2.5,1779.8
4,1001,AL,Autauga County,1001020500,1,1.0,10766,4.4,2446.4


In [22]:
cc = pd.read_csv('data/census_tract_city_county_mapping.csv')

In [37]:
ruca = r[r.State=='CA']

ruca = ruca[['County', 'census_tract', 'RUCA code']]

ruca.head()

In [42]:
ruca.County = ruca.County.str.replace(' County','')

ruca.County = ruca.County.str.upper()

ruca.head()

In [48]:
cities = pd.read_csv('data/cities_1.csv')

In [52]:
city = cities[~cities.city.isna()]

In [53]:
city.head()

Unnamed: 0,SiteCity,TotalFirstYearGrosskWh,TotalFirstYearGrossTherm,TotalLifecycleGrosskWh,TotalLifecycleGrossTherm,Budget,Total Claims,Total Programs,% OBF Budget,% Resource Budget,city,dac_proportion,ces_score_median,ces_percentile_median,Total Tax Revenue,Average Tax Revenue,median_household_income_usd,total_population
0,ACAMPO,48664.72674,-148.89033,583976.72088,-1786.68396,17684.568509,6,1,0.0,100.0,ACAMPO,0.0,21.531463,40.282507,,,83913.0,9814.0
1,ADELANTO,13576.844232,-30.037266,67884.22116,-150.18633,27320.063231,4,1,0.0,100.0,ADELANTO,0.5,35.908799,68.451255,23628867.0,7876289.0,45695.5,35179.0
2,ALAMO,73049.3926,118.64524,684999.97629,-1279.097025,48785.542683,15,1,88.044896,100.0,ALAMO,0.0,2.175088,0.416194,,,219750.0,9373.0
3,ALBANY,153765.922,-785.076334,842200.6074,-4288.07028,60926.193528,35,2,0.0,100.0,ALBANY,0.0,8.128606,8.740068,57342690.0,19114230.0,102361.0,16742.0
4,ALHAMBRA,21630.949392,-32.172462,120233.350224,-178.879938,48465.447601,7,1,0.0,100.0,ALHAMBRA,0.15,33.702151,66.143272,154981924.0,51660640.0,59206.0,84647.0


In [54]:
city[city.SiteCity != city.city]

Unnamed: 0,SiteCity,TotalFirstYearGrosskWh,TotalFirstYearGrossTherm,TotalLifecycleGrosskWh,TotalLifecycleGrossTherm,Budget,Total Claims,Total Programs,% OBF Budget,% Resource Budget,city,dac_proportion,ces_score_median,ces_percentile_median,Total Tax Revenue,Average Tax Revenue,median_household_income_usd,total_population


In [55]:
len(city)

417

In [272]:
len(cities)

536

In [273]:
536-417

119

In [57]:
cc.head()

Unnamed: 0,census_tract,zip_code,city,county
0,6019001100,93706,Fresno,Fresno
1,6071001600,91761,Ontario,San Bernardino
2,6019000200,93706,Fresno,Fresno
3,6077000801,95203,Stockton,San Joaquin
4,6019001500,93725,Fresno,Fresno


In [58]:
cc.city = cc.city.str.upper()
cc.county = cc.county.str.upper()

cc.head()

In [60]:
ruca.head()

Unnamed: 0,County,census_tract,RUCA code
3560,ALAMEDA,6001400100,1
3561,ALAMEDA,6001400200,1
3562,ALAMEDA,6001400300,1
3563,ALAMEDA,6001400400,1
3564,ALAMEDA,6001400500,1


In [61]:
ccr = cc.merge(ruca, left_on='census_tract', right_on='census_tract', how='left')

In [62]:
ccr.head()

Unnamed: 0,census_tract,zip_code,city,county,County,RUCA code
0,6019001100,93706,FRESNO,FRESNO,FRESNO,1
1,6071001600,91761,ONTARIO,SAN BERNARDINO,SAN BERNARDINO,1
2,6019000200,93706,FRESNO,FRESNO,FRESNO,1
3,6077000801,95203,STOCKTON,SAN JOAQUIN,SAN JOAQUIN,1
4,6019001500,93725,FRESNO,FRESNO,FRESNO,1


In [63]:
ccr[ccr.county != ccr.County]

ccr.iloc[8032,3]

ccr.iloc[0,4]

ccr.county = ccr.county.str.replace(' ','')

ccr.drop(columns='county', inplace=True)

In [72]:
ccr.head()

Unnamed: 0,census_tract,zip_code,city,County,RUCA code
0,6019001100,93706,FRESNO,FRESNO,1
1,6071001600,91761,ONTARIO,SAN BERNARDINO,1
2,6019000200,93706,FRESNO,FRESNO,1
3,6077000801,95203,STOCKTON,SAN JOAQUIN,1
4,6019001500,93725,FRESNO,FRESNO,1


In [89]:
ccr = ccr[['city', 'County', 'RUCA code']]

ccr.drop_duplicates(inplace=True)

ccr.city.value_counts()

GRASS VALLEY      5
PETALUMA          4
LANCASTER         4
LODI              4
WATSONVILLE       4
                 ..
SOUTH EL MONTE    1
CAMPBELL          1
ESSEX             1
MIDDLETOWN        1
GARDEN GROVE      1
Name: city, Length: 789, dtype: int64

In [87]:
ccr[ccr.city=='GRASS VALLEY']

Unnamed: 0,city,County,RUCA code
2364,GRASS VALLEY,NEVADA,4
6233,GRASS VALLEY,NEVADA,5
6261,GRASS VALLEY,NEVADA,6
6856,GRASS VALLEY,NEVADA,10
7272,GRASS VALLEY,NEVADA,3


since the ruca codes are assigned by census tract, some cities have multiple codes.... should take the average of them?

In [88]:
(4+5+6+10+3)/5

5.6

In [112]:
ruca = ccr.groupby('city').mean().round().astype(int).reset_index()

In [103]:
cc = ccr[['city','County']].drop_duplicates().sort_values('city')

In [106]:
cc.city.value_counts()>1

WESTLAKE VILLAGE     True
WATSONVILLE          True
FIREBAUGH            True
DIXON                True
SAN FRANCISCO        True
                    ...  
MONTEREY PARK       False
SOMERSET            False
CEDARVILLE          False
GEYSERVILLE         False
GARDEN GROVE        False
Name: city, Length: 789, dtype: bool

In [108]:
cc[cc.city=='SAN FRANCISCO']

Unnamed: 0,city,County
7961,SAN FRANCISCO,SAN MATEO
759,SAN FRANCISCO,SAN FRANCISCO


^^^ still have to deal with double counties, but for now ruca code looks good

In [111]:
ccr.sort_values('city').to_csv('data/city_county_ruca.csv', index=False)

______

#### According to the documentation, a RUCA code of 99 means there is zero population there. This is obviously not the case with these cities in LA county, I'm just going to give them a code of 1

also whats up with those duplicates :00000

In [115]:
ruca.head()

Unnamed: 0,city,RUCA code
0,ACAMPO,2
1,ACTON,2
2,ADELANTO,1
3,AGOURA HILLS,1
4,ALAMEDA,1


In [116]:
ruca.city.value_counts()

TEMECULA          1
CITRUS HEIGHTS    1
NICE              1
BELLFLOWER        1
SEAL BEACH        1
                 ..
FAIRFAX           1
NORTH HILLS       1
FRAZIER PARK      1
MONTEREY PARK     1
GARDEN GROVE      1
Name: city, Length: 789, dtype: int64

In [117]:
cities_2 = city.merge(ruca, left_on='city', right_on='city', how='left')

In [118]:
cities_2.head()

Unnamed: 0,SiteCity,TotalFirstYearGrosskWh,TotalFirstYearGrossTherm,TotalLifecycleGrosskWh,TotalLifecycleGrossTherm,Budget,Total Claims,Total Programs,% OBF Budget,% Resource Budget,city,dac_proportion,ces_score_median,ces_percentile_median,Total Tax Revenue,Average Tax Revenue,median_household_income_usd,total_population,RUCA code
0,ACAMPO,48664.72674,-148.89033,583976.72088,-1786.68396,17684.568509,6,1,0.0,100.0,ACAMPO,0.0,21.531463,40.282507,,,83913.0,9814.0,2
1,ADELANTO,13576.844232,-30.037266,67884.22116,-150.18633,27320.063231,4,1,0.0,100.0,ADELANTO,0.5,35.908799,68.451255,23628867.0,7876289.0,45695.5,35179.0,1
2,ALAMO,73049.3926,118.64524,684999.97629,-1279.097025,48785.542683,15,1,88.044896,100.0,ALAMO,0.0,2.175088,0.416194,,,219750.0,9373.0,1
3,ALBANY,153765.922,-785.076334,842200.6074,-4288.07028,60926.193528,35,2,0.0,100.0,ALBANY,0.0,8.128606,8.740068,57342690.0,19114230.0,102361.0,16742.0,1
4,ALHAMBRA,21630.949392,-32.172462,120233.350224,-178.879938,48465.447601,7,1,0.0,100.0,ALHAMBRA,0.15,33.702151,66.143272,154981924.0,51660640.0,59206.0,84647.0,1


In [119]:
cities_2.shape

(417, 19)

In [122]:
cities_2['RUCA code'].value_counts()

1     200
2      75
4      49
10     24
7      21
5      13
6      12
3      12
8       7
50      3
9       1
Name: RUCA code, dtype: int64

In [125]:
la = cities_2[cities_2['RUCA code']==50].index

In [126]:
cities_2.loc[la, 'RUCA code'] = 1

In [128]:
cities_2.loc[la, :]

Unnamed: 0,SiteCity,TotalFirstYearGrosskWh,TotalFirstYearGrossTherm,TotalLifecycleGrosskWh,TotalLifecycleGrossTherm,Budget,Total Claims,Total Programs,% OBF Budget,% Resource Budget,city,dac_proportion,ces_score_median,ces_percentile_median,Total Tax Revenue,Average Tax Revenue,median_household_income_usd,total_population,RUCA code
58,CARSON,931403.7,6040.052619,9843808.0,18058.363094,472207.0,109,2,35.830506,100.0,CARSON,0.777778,46.603986,84.846765,208784700.0,69594910.0,79687.5,83730.0,1
197,LONG BEACH,6799533.0,1307.656852,78451120.0,22194.600613,3395863.0,1029,2,90.307233,100.0,LONG BEACH,0.522124,42.915277,80.054231,1371076000.0,457025300.0,60616.0,476520.0,1
382,TORRANCE,189239.1,-118.233711,1710864.0,-984.729376,414119.0,39,2,0.0,100.0,TORRANCE,0.162162,25.326962,50.479253,512015400.0,170671800.0,89463.5,182791.0,1


In [129]:
cities_2['RUCA code'].value_counts()

1     203
2      75
4      49
10     24
7      21
5      13
6      12
3      12
8       7
9       1
Name: RUCA code, dtype: int64

In [144]:
cities_2.rename(columns={'RUCA':'ruca_average'}, inplace=True)

In [137]:
cities_2.head()

Unnamed: 0,SiteCity,TotalFirstYearGrosskWh,TotalFirstYearGrossTherm,TotalLifecycleGrosskWh,TotalLifecycleGrossTherm,Budget,Total Claims,Total Programs,% OBF Budget,% Resource Budget,city,dac_proportion,ces_score_median,ces_percentile_median,Total Tax Revenue,Average Tax Revenue,median_household_income_usd,total_population,RUCA
0,ACAMPO,48664.72674,-148.89033,583976.72088,-1786.68396,17684.568509,6,1,0.0,100.0,ACAMPO,0.0,21.531463,40.282507,,,83913.0,9814.0,2
1,ADELANTO,13576.844232,-30.037266,67884.22116,-150.18633,27320.063231,4,1,0.0,100.0,ADELANTO,0.5,35.908799,68.451255,23628867.0,7876289.0,45695.5,35179.0,1
2,ALAMO,73049.3926,118.64524,684999.97629,-1279.097025,48785.542683,15,1,88.044896,100.0,ALAMO,0.0,2.175088,0.416194,,,219750.0,9373.0,1
3,ALBANY,153765.922,-785.076334,842200.6074,-4288.07028,60926.193528,35,2,0.0,100.0,ALBANY,0.0,8.128606,8.740068,57342690.0,19114230.0,102361.0,16742.0,1
4,ALHAMBRA,21630.949392,-32.172462,120233.350224,-178.879938,48465.447601,7,1,0.0,100.0,ALHAMBRA,0.15,33.702151,66.143272,154981924.0,51660640.0,59206.0,84647.0,1


In [138]:
ruca_median = ccr.groupby('city').median().round().astype(int).reset_index()
ruca_median.head()

Unnamed: 0,city,RUCA code
0,ACAMPO,2
1,ACTON,2
2,ADELANTO,1
3,AGOURA HILLS,1
4,ALAMEDA,1


In [139]:
cities_2 = cities_2.merge(ruca_median, left_on='city',right_on='city',how='left')

In [140]:
cities_2.rename(columns={'RUCA code':'ruca_median'}, inplace=True)

In [256]:
cities_2.head()

Unnamed: 0,SiteCity,TotalFirstYearGrosskWh,TotalFirstYearGrossTherm,TotalLifecycleGrosskWh,TotalLifecycleGrossTherm,Budget,Total Claims,Total Programs,% OBF Budget,% Resource Budget,city,dac_proportion,ces_score_median,ces_percentile_median,Total Tax Revenue,Average Tax Revenue,median_household_income_usd,total_population,ruca_average,ruca_median
0,ACAMPO,48664.72674,-148.89033,583976.72088,-1786.68396,17684.568509,6,1,0.0,100.0,ACAMPO,0.0,21.531463,40.282507,,,83913.0,9814.0,2,2
1,ADELANTO,13576.844232,-30.037266,67884.22116,-150.18633,27320.063231,4,1,0.0,100.0,ADELANTO,0.5,35.908799,68.451255,23628867.0,7876289.0,45695.5,35179.0,1,1
2,ALAMO,73049.3926,118.64524,684999.97629,-1279.097025,48785.542683,15,1,88.044896,100.0,ALAMO,0.0,2.175088,0.416194,,,219750.0,9373.0,1,1
3,ALBANY,153765.922,-785.076334,842200.6074,-4288.07028,60926.193528,35,2,0.0,100.0,ALBANY,0.0,8.128606,8.740068,57342690.0,19114230.0,102361.0,16742.0,1,1
4,ALHAMBRA,21630.949392,-32.172462,120233.350224,-178.879938,48465.447601,7,1,0.0,100.0,ALHAMBRA,0.15,33.702151,66.143272,154981924.0,51660640.0,59206.0,84647.0,1,1


In [258]:
cities_2.drop(columns=['Average Tax Revenue', 'city'], inplace=True)

In [259]:
cities_2.head()

Unnamed: 0,SiteCity,TotalFirstYearGrosskWh,TotalFirstYearGrossTherm,TotalLifecycleGrosskWh,TotalLifecycleGrossTherm,Budget,Total Claims,Total Programs,% OBF Budget,% Resource Budget,dac_proportion,ces_score_median,ces_percentile_median,Total Tax Revenue,median_household_income_usd,total_population,ruca_average,ruca_median
0,ACAMPO,48664.72674,-148.89033,583976.72088,-1786.68396,17684.568509,6,1,0.0,100.0,0.0,21.531463,40.282507,,83913.0,9814.0,2,2
1,ADELANTO,13576.844232,-30.037266,67884.22116,-150.18633,27320.063231,4,1,0.0,100.0,0.5,35.908799,68.451255,23628867.0,45695.5,35179.0,1,1
2,ALAMO,73049.3926,118.64524,684999.97629,-1279.097025,48785.542683,15,1,88.044896,100.0,0.0,2.175088,0.416194,,219750.0,9373.0,1,1
3,ALBANY,153765.922,-785.076334,842200.6074,-4288.07028,60926.193528,35,2,0.0,100.0,0.0,8.128606,8.740068,57342690.0,102361.0,16742.0,1,1
4,ALHAMBRA,21630.949392,-32.172462,120233.350224,-178.879938,48465.447601,7,1,0.0,100.0,0.15,33.702151,66.143272,154981924.0,59206.0,84647.0,1,1


In [282]:
cities_2.to_csv('data/cities_2.csv', index=False)

## **TAX REVENUE COLUMN**

In [266]:
tax = pd.read_csv('data/cities_predictedtaxrev.csv', usecols=['SiteCity', 'Total.Tax.Revenue'])

In [267]:
tax.head()

Unnamed: 0,SiteCity,Total.Tax.Revenue
0,ACAMPO,-123910700.0
1,ADELANTO,23628870.0
2,ALAMO,-126124700.0
3,ALBANY,57342690.0
4,ALHAMBRA,154981900.0


In [268]:
cities_2 = cities_2.merge(tax, left_on='SiteCity', right_on='SiteCity', how='left')

In [271]:
cities_2.head()

Unnamed: 0,SiteCity,TotalFirstYearGrosskWh,TotalFirstYearGrossTherm,TotalLifecycleGrosskWh,TotalLifecycleGrossTherm,Budget,Total Claims,Total Programs,% OBF Budget,% Resource Budget,dac_proportion,ces_score_median,ces_percentile_median,Total Tax Revenue,median_household_income_usd,total_population,ruca_average,ruca_median,Total.Tax.Revenue
0,ACAMPO,48664.72674,-148.89033,583976.72088,-1786.68396,17684.568509,6,1,0.0,100.0,0.0,21.531463,40.282507,,83913.0,9814.0,2,2,-123910700.0
1,ADELANTO,13576.844232,-30.037266,67884.22116,-150.18633,27320.063231,4,1,0.0,100.0,0.5,35.908799,68.451255,23628867.0,45695.5,35179.0,1,1,23628870.0
2,ALAMO,73049.3926,118.64524,684999.97629,-1279.097025,48785.542683,15,1,88.044896,100.0,0.0,2.175088,0.416194,,219750.0,9373.0,1,1,-126124700.0
3,ALBANY,153765.922,-785.076334,842200.6074,-4288.07028,60926.193528,35,2,0.0,100.0,0.0,8.128606,8.740068,57342690.0,102361.0,16742.0,1,1,57342690.0
4,ALHAMBRA,21630.949392,-32.172462,120233.350224,-178.879938,48465.447601,7,1,0.0,100.0,0.15,33.702151,66.143272,154981924.0,59206.0,84647.0,1,1,154981900.0


In [283]:
len(cities_2[cities_2.ruca_average != cities_2.ruca_median])

10

In [279]:
cities_2.loc[[382,58, 197], 'ruca_median'] = 1

In [280]:
cities_2.loc[[382,58,197], 'ruca_median']

382    1
58     1
197    1
Name: ruca_median, dtype: int32

## **COUNTY DEMOGRAPHICS**

In [149]:
county = pd.read_csv('data/better_cpuc_compiled.csv', 
                     usecols=['IRR', 'CES Score Median', 'Mean Income', 'Population', 'County'])

In [150]:
county.head()

Unnamed: 0,County,Population,Mean Income,CES Score Median,IRR
0,Alameda,1656754,76626,21.532906,0.270123
1,Alpine,1039,78945,11.343357,0.656045
2,Amador,38429,51453,23.257549,0.502563
3,Butte,225817,62405,20.033261,0.434555
4,Calaveras,45514,70651,15.180852,0.509595


In [151]:
county.County = county.County.str.upper()

In [152]:
tax = pd.read_csv('data/CountyRevenues(2017-2019).csv')

In [154]:
tax.County = tax.County.str.upper()

In [155]:
county = county.merge(tax, left_on='County', right_on='County', how='left')

In [156]:
county.head()

Unnamed: 0,County,Population,Mean Income,CES Score Median,IRR,TotalRevenue
0,ALAMEDA,1656754,76626,21.532906,0.270123,9617053146
1,ALPINE,1039,78945,11.343357,0.656045,60490797
2,AMADOR,38429,51453,23.257549,0.502563,243032260
3,BUTTE,225817,62405,20.033261,0.434555,1290504922
4,CALAVERAS,45514,70651,15.180852,0.509595,677652486


In [157]:
tax.shape

(57, 2)

In [158]:
county.shape

(58, 6)

In [159]:
county[county.TotalRevenue.isna()]

Unnamed: 0,County,Population,Mean Income,CES Score Median,IRR,TotalRevenue
37,SAN FRANCISCO,874961,64631,14.761339,0.079646,


In [160]:
sf = 1457014810

county.loc[37, 'TotalRevenue'] = sf

In [163]:
county[county.County == 'SAN FRANCISCO']

Unnamed: 0,County,Population,Mean Income,CES Score Median,IRR,TotalRevenue
37,SAN FRANCISCO,874961,64631,14.761339,0.079646,1457014810


In [164]:
county.head()

Unnamed: 0,County,Population,Mean Income,CES Score Median,IRR,TotalRevenue
0,ALAMEDA,1656754,76626,21.532906,0.270123,9617053146
1,ALPINE,1039,78945,11.343357,0.656045,60490797
2,AMADOR,38429,51453,23.257549,0.502563,243032260
3,BUTTE,225817,62405,20.033261,0.434555,1290504922
4,CALAVERAS,45514,70651,15.180852,0.509595,677652486


In [171]:
county.TotalRevenue = county.TotalRevenue.str.replace(',', '')

In [172]:
county.head()

Unnamed: 0,County,Population,Mean Income,CES Score Median,IRR,TotalRevenue
0,ALAMEDA,1656754,76626,21.532906,0.270123,9617053146
1,ALPINE,1039,78945,11.343357,0.656045,60490797
2,AMADOR,38429,51453,23.257549,0.502563,243032260
3,BUTTE,225817,62405,20.033261,0.434555,1290504922
4,CALAVERAS,45514,70651,15.180852,0.509595,677652486


Waiting on more ces demographics

In [218]:
ces = pd.read_csv('data/ces_dac_county_proportion_median.csv')

income = pd.read_csv('data/census_median_income_by_county.csv')

In [219]:
ces.county.to_list()

['Alameda ',
 'Alpine ',
 'Amador ',
 'Butte ',
 'Calaveras ',
 'Colusa ',
 'Contra Costa',
 'Del Norte',
 'El Dorado',
 'Fresno ',
 'Glenn ',
 'Humboldt ',
 'Imperial ',
 'Inyo ',
 'Kern ',
 'Kings ',
 'Lake ',
 'Lassen ',
 'Los Angeles',
 'Madera ',
 'Marin ',
 'Mariposa ',
 'Mendocino ',
 'Merced ',
 'Modoc ',
 'Mono ',
 'Monterey ',
 'Napa ',
 'Nevada ',
 'Orange ',
 'Placer ',
 'Plumas ',
 'Riverside ',
 'Sacramento ',
 'San Benito',
 'San Bernardino',
 'San Diego',
 'San Francisco',
 'San Joaquin',
 'San Luis Obispo',
 'San Mateo',
 'Santa Barbara',
 'Santa Clara',
 'Santa Cruz',
 'Shasta ',
 'Sierra ',
 'Siskiyou ',
 'Solano ',
 'Sonoma ',
 'Stanislaus ',
 'Sutter ',
 'Tehama ',
 'Trinity ',
 'Tulare ',
 'Tuolumne ',
 'Ventura ',
 'Yolo ',
 'Yuba ']

In [226]:
s = ['Contra Costa',
 'Del Norte',
 'El Dorado','Los Angeles','San Benito',
 'San Bernardino',
 'San Diego',
 'San Francisco',
 'San Joaquin',
 'San Luis Obispo',
 'San Mateo',
 'Santa Barbara',
 'Santa Clara',
 'Santa Cruz']

In [227]:
spacey = ces[ces.county.isin(s)]

In [228]:
spacey.head()

Unnamed: 0,county,dac_proportion,ces_score_median,ces_percentile_median
6,Contra Costa,0.120773,17.213198,30.861395
7,Del Norte,0.0,16.844851,29.97856
8,El Dorado,0.0,10.546223,14.390213
18,Los Angeles,0.443022,36.397588,70.620507
34,San Benito,0.0,31.288566,61.748014


In [229]:
too_spacey = ces[~ces.county.isin(s)]

In [230]:
too_spacey.head()

Unnamed: 0,county,dac_proportion,ces_score_median,ces_percentile_median
0,Alameda,0.105556,21.532906,42.098625
1,Alpine,0.0,11.343357,15.853197
2,Amador,0.0,23.257549,45.756085
3,Butte,0.039216,20.033261,38.365494
4,Calaveras,0.0,15.180852,25.551772


In [231]:
too_spacey.county = too_spacey.county.str.replace(' ', '')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [232]:
frames = [spacey, too_spacey]

In [233]:
dac = pd.concat(frames)

In [236]:
dac = dac.sort_values('county')

In [237]:
dac.head()

Unnamed: 0,county,dac_proportion,ces_score_median,ces_percentile_median
0,Alameda,0.105556,21.532906,42.098625
1,Alpine,0.0,11.343357,15.853197
2,Amador,0.0,23.257549,45.756085
3,Butte,0.039216,20.033261,38.365494
4,Calaveras,0.0,15.180852,25.551772


In [239]:
dac = dac.merge(income, left_on='county', right_on='county', how='left')

In [240]:
dac.county = dac.county.str.upper()

In [241]:
county_dems = county.merge(dac, left_on='County', right_on='county',how='left')

In [242]:
county_dems.head()

Unnamed: 0,County,Population,Mean Income,CES Score Median,IRR,TotalRevenue,county,dac_proportion,ces_score_median,ces_percentile_median,median_household_income_usd
0,ALAMEDA,1656754,76626,21.532906,0.270123,9617053146,ALAMEDA,0.105556,21.532906,42.098625,99406
1,ALPINE,1039,78945,11.343357,0.656045,60490797,ALPINE,0.0,11.343357,15.853197,63750
2,AMADOR,38429,51453,23.257549,0.502563,243032260,AMADOR,0.0,23.257549,45.756085,62772
3,BUTTE,225817,62405,20.033261,0.434555,1290504922,BUTTE,0.039216,20.033261,38.365494,52537
4,CALAVERAS,45514,70651,15.180852,0.509595,677652486,CALAVERAS,0.0,15.180852,25.551772,63158


In [243]:
county_dems[county_dems.County != county_dems.county]

Unnamed: 0,County,Population,Mean Income,CES Score Median,IRR,TotalRevenue,county,dac_proportion,ces_score_median,ces_percentile_median,median_household_income_usd


In [246]:
county_dems.loc[37,'TotalRevenue'] = 1457014810

In [247]:
county_dems.loc[37, :]

County                         SAN FRANCISCO
Population                            874961
Mean Income                            64631
CES Score Median                     14.7613
IRR                                0.0796459
TotalRevenue                      1457014810
county                         SAN FRANCISCO
dac_proportion                     0.0615385
ces_score_median                     14.7613
ces_percentile_median                24.5365
median_household_income_usd           112449
Name: 37, dtype: object

In [252]:
county_dems.drop(columns='county', inplace=True)

In [253]:
county_dems.head()

Unnamed: 0,County,Population,Mean Income,CES Score Median,IRR,TotalRevenue,dac_proportion,ces_score_median,ces_percentile_median,median_household_income_usd
0,ALAMEDA,1656754,76626,21.532906,0.270123,9617053146,0.105556,21.532906,42.098625,99406
1,ALPINE,1039,78945,11.343357,0.656045,60490797,0.0,11.343357,15.853197,63750
2,AMADOR,38429,51453,23.257549,0.502563,243032260,0.0,23.257549,45.756085,62772
3,BUTTE,225817,62405,20.033261,0.434555,1290504922,0.039216,20.033261,38.365494,52537
4,CALAVERAS,45514,70651,15.180852,0.509595,677652486,0.0,15.180852,25.551772,63158


In [255]:
county_dems.to_csv('data/county_demographics.csv', index=False)