# Working with CA Aggregated Data

Pre-processing input data for a smoother upload experience of the state data to the WaDE 2.0 database.
Using geopandas to read in shp file, and coverting to WKT for ReportingUnit geometry.

#### Notes:
There were some duplicate entries noted in the input files (e.g. 2011).  Those duplicate rows will be removed.

In [1]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
import geopandas as gpd # the library that lets us read in shapefiles
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

#Setting work directory, reading inputs, creating dataframe
workingDir = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/California/AggregatedAmounts/RawInputData"
os.chdir(workingDir)

# Water Balance Data

In [2]:
#CSV input file
fileInput_2011 = "CA-DWR-WaterBalance-Level2-DP-1000-2011-DAUCO_input.csv"
fileInput_2012 = "CA-DWR-WaterBalance-Level2-DP-1000-2012-DAUCO_input.csv"
fileInput_2013 = "CA-DWR-WaterBalance-Level2-DP-1000-2013-DAUCO_input.csv"
fileInput_2014 = "CA-DWR-WaterBalance-Level2-DP-1000-2014-DAUCO_input.csv"
fileInput_2015 = "CA-DWR-WaterBalance-Level2-DP-1000-2015-DAUCO_input.csv"

In [3]:
# check input 2011
df2011 = pd.read_csv(fileInput_2011)
df2011.head(1)

Unnamed: 0,CategoryA,CategoryC,DAU,DAU_NAME,HR_CODE,HR_NAME,PA,Longitude,Latitude,KAcreFt,Year,CategoryD,CategoryB
0,Agriculture,Applied Water,DAU04827,Pressure,3,Central Coast,301,-121.637711,36.63942,89.2,2011,AG1,1


In [4]:
# check input 2012
df2012 = pd.read_csv(fileInput_2012)
df2012.head(1)

Unnamed: 0,CategoryA,CategoryC,DAU,DAU_NAME,HR_CODE,HR_NAME,PA,Longitude,Latitude,KAcreFt,Year,CategoryD,CategoryB
0,Agriculture,Applied Water,DAU04827,Pressure,3,Central Coast,301,-121.637711,36.63942,92.7,2012,AG1,1


In [5]:
# check input 2013
df2013 = pd.read_csv(fileInput_2013)
df2013.head(1)

Unnamed: 0,CategoryA,CategoryC,DAU,DAU_NAME,HR_CODE,HR_NAME,PA,Longitude,Latitude,KAcreFt,Year,CategoryD,CategoryB
0,Agriculture,Applied Water,DAU04827,Pressure,3,Central Coast,301,-121.637711,36.63942,112.2,2013,AG1,1


In [6]:
# check input 2014
df2014 = pd.read_csv(fileInput_2014)
df2014.head(1)

Unnamed: 0,CategoryA,CategoryC,DAU,DAU_NAME,HR_CODE,HR_NAME,PA,Longitude,Latitude,KAcreFt,Year,CategoryD,CategoryB
0,Agriculture,Applied Water,DAU04827,Pressure,3,Central Coast,301,-121.637711,36.63942,100.5,2014,AG1,1


In [7]:
# check input 2015
df2015 = pd.read_csv(fileInput_2015)
df2015.head(1)

Unnamed: 0,CategoryA,CategoryC,DAU,DAU_NAME,HR_CODE,HR_NAME,PA,Longitude,Latitude,KAcreFt,Year,CategoryD,CategoryB
0,Agriculture,Applied Water,DAU04827,Pressure,3,Central Coast,301,-121.637711,36.63942,100.5,2015,AG1,1


In [8]:
# Concatenate DAUCO datafraes together.
frames = [df2011, df2012, df2013, df2014, df2015]
dfAll = pd.concat(frames)
dfAll =dfAll.drop_duplicates()
dfAll.head(3)

Unnamed: 0,CategoryA,CategoryC,DAU,DAU_NAME,HR_CODE,HR_NAME,PA,Longitude,Latitude,KAcreFt,Year,CategoryD,CategoryB
0,Agriculture,Applied Water,DAU04827,Pressure,3,Central Coast,301,-121.637711,36.63942,89.2,2011,AG1,1
1,Agriculture,Applied Water - Groundwater Recharge,DAU04827,Pressure,3,Central Coast,301,-121.637711,36.63942,0.0,2011,AG2,2
2,Agriculture,Conveyance Deep Percolation,DAU04827,Pressure,3,Central Coast,301,-121.637711,36.63942,0.0,2011,AG22,22


### Seperate Water Balance out by Reporting Unit Type, Sum by Year.

In [9]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(dfAll.dtypes)

CategoryA     object
CategoryC     object
DAU           object
DAU_NAME      object
HR_CODE        int64
HR_NAME       object
PA             int64
Longitude    float64
Latitude     float64
KAcreFt      float64
Year           int64
CategoryD     object
CategoryB     object
dtype: object


In [10]:
dfPAtemp = dfAll
dfPAtemp = dfPAtemp[dfPAtemp.CategoryC == "Applied Water Use"]
dfPAtemp

Unnamed: 0,CategoryA,CategoryC,DAU,DAU_NAME,HR_CODE,HR_NAME,PA,Longitude,Latitude,KAcreFt,Year,CategoryD,CategoryB
134,Agriculture,Applied Water Use,DAU04827,Pressure,3,Central Coast,301,-121.637711,36.639420,89.2,2011,AWUAG,Computed
142,Urban,Applied Water Use,DAU04827,Pressure,3,Central Coast,301,-121.637711,36.639420,11.6,2011,AWUURB,Computed
150,Managed Wetlands,Applied Water Use,DAU04827,Pressure,3,Central Coast,301,-121.637711,36.639420,0.4,2011,AWUMW,Computed
158,Instream Flow Requirements,Applied Water Use,DAU04827,Pressure,3,Central Coast,301,-121.637711,36.639420,0.0,2011,AWUIFR,Computed
162,Wild and Scenic River,Applied Water Use,DAU04827,Pressure,3,Central Coast,301,-121.637711,36.639420,0.0,2011,AWUWSR,Computed
...,...,...,...,...,...,...,...,...,...,...,...,...,...
82330,Urban,Applied Water Use,DAU18105,Ione-Jenny Lind,6,San Joaquin River,603,-120.794776,38.047984,1.4,2015,AWUURB,Computed
82338,Managed Wetlands,Applied Water Use,DAU18105,Ione-Jenny Lind,6,San Joaquin River,603,-120.794776,38.047984,0.0,2015,AWUMW,Computed
82346,Instream Flow Requirements,Applied Water Use,DAU18105,Ione-Jenny Lind,6,San Joaquin River,603,-120.794776,38.047984,0.0,2015,AWUIFR,Computed
82350,Wild and Scenic River,Applied Water Use,DAU18105,Ione-Jenny Lind,6,San Joaquin River,603,-120.794776,38.047984,0.0,2015,AWUWSR,Computed


In [11]:
# Create Individual Reporting Unit dataframes: Type = "PA", Variable = "Applied Water Use".
# Group by Reporting Unit type and Sum the KAcreFt field.
###########################################################################################

dfPAtemp1 = dfAll
dfPAtemp1 = dfPAtemp1[dfPAtemp1.CategoryC == "Applied Water Use"]
dfPAtemp1 = dfPAtemp1.groupby(['PA', 'Year', 'CategoryA', 'CategoryC'])['KAcreFt'].sum().reset_index()

columnsList = ['inVariable', 'inReportingUnitTypeCV', 'inReportingUnitName', 'inReportingUnitNativeID', 'inYear', 'inBenUse', 'inAmount']
dfPA1 = pd.DataFrame(columns=columnsList)
dfPA1['inVariable'] = dfPAtemp1['CategoryC']
dfPA1['inReportingUnitName'] = dfPAtemp1['PA']
dfPA1['inReportingUnitNativeID'] = dfPAtemp1['PA']
dfPA1['inYear'] = dfPAtemp1['Year']
dfPA1['inBenUse'] = dfPAtemp1['CategoryA']
dfPA1['inAmount'] = dfPAtemp1['KAcreFt'] * 1000
dfPA1['inTimeframeStart'] = '01/01/' + dfPAtemp1['Year'].astype(str)
dfPA1['inTimeframeEnd'] = '12/31/' + dfPAtemp1['Year'].astype(str)
dfPA1.inReportingUnitTypeCV = "Planning Area"
dfPA1.head(3)

Unnamed: 0,inVariable,inReportingUnitTypeCV,inReportingUnitName,inReportingUnitNativeID,inYear,inBenUse,inAmount,inTimeframeStart,inTimeframeEnd
0,Applied Water Use,Planning Area,101,101,2011,Agriculture,347200.0,01/01/2011,12/31/2011
1,Applied Water Use,Planning Area,101,101,2011,Instream Flow Requirements,0.0,01/01/2011,12/31/2011
2,Applied Water Use,Planning Area,101,101,2011,Managed Wetlands,171900.0,01/01/2011,12/31/2011


In [12]:
# Create Individual Reporting Unit dataframes: Type = "PA", Variable = "Depletion".
# Group by Reporting Unit type and Sum the KAcreFt field.
###########################################################################################

dfPAtemp2 = dfAll
dfPAtemp2 = dfPAtemp2[dfPAtemp2.CategoryC == "Depletion"]
dfPAtemp2 = dfPAtemp2.groupby(['PA', 'Year', 'CategoryA', 'CategoryC'])['KAcreFt'].sum().reset_index()

columnsList = ['inVariable', 'inReportingUnitTypeCV', 'inReportingUnitName', 'inReportingUnitNativeID', 'inYear', 'inBenUse', 'inAmount']
dfPA2 = pd.DataFrame(columns=columnsList)
dfPA2['inVariable'] = dfPAtemp2['CategoryC']
dfPA2['inReportingUnitName'] = dfPAtemp2['PA']
dfPA2['inReportingUnitNativeID'] = dfPAtemp2['PA']
dfPA2['inYear'] = dfPAtemp2['Year']
dfPA2['inBenUse'] = dfPAtemp2['CategoryA']
dfPA2['inAmount'] = dfPAtemp2['KAcreFt'] * 1000
dfPA2['inTimeframeStart'] = '01/01/' + dfPAtemp2['Year'].astype(str)
dfPA2['inTimeframeEnd'] = '12/31/' + dfPAtemp2['Year'].astype(str)
dfPA2.inReportingUnitTypeCV = "Planning Area"
dfPA2.head(3)

Unnamed: 0,inVariable,inReportingUnitTypeCV,inReportingUnitName,inReportingUnitNativeID,inYear,inBenUse,inAmount,inTimeframeStart,inTimeframeEnd
0,Depletion,Planning Area,101,101,2011,Agriculture,320400.0,01/01/2011,12/31/2011
1,Depletion,Planning Area,101,101,2011,Instream Flow Requirements,0.0,01/01/2011,12/31/2011
2,Depletion,Planning Area,101,101,2011,Managed Wetlands,136700.0,01/01/2011,12/31/2011


In [13]:
# Create Individual Reporting Unit dataframes: Type = "HR", Variable = "Applied Water Use".
# Group by Reporting Unit type and Sum the KAcreFt field.
###########################################################################################

dfHRtemp1 = dfAll
dfHRtemp1 = dfHRtemp1[dfHRtemp1.CategoryC == "Applied Water Use"]
dfHRtemp1 = dfHRtemp1.groupby(['HR_NAME', 'HR_CODE', 'Year', 'CategoryA', 'CategoryC'])['KAcreFt'].sum().reset_index()

columnsList = ['inVariable', 'inReportingUnitTypeCV', 'inReportingUnitName', 'inReportingUnitNativeID', 'inYear', 'inBenUse', 'inAmount']
dfHR1 = pd.DataFrame(columns=columnsList)
dfHR1['inVariable'] = dfHRtemp1['CategoryC']
dfHR1['inReportingUnitName'] = dfHRtemp1['HR_NAME']
dfHR1['inReportingUnitNativeID'] = dfHRtemp1['HR_CODE']
dfHR1['inYear'] = dfHRtemp1['Year']
dfHR1['inBenUse'] = dfHRtemp1['CategoryA']
dfHR1['inAmount'] = dfHRtemp1['KAcreFt'] * 1000
dfHR1['inTimeframeStart'] = '01/01/' + dfHRtemp1['Year'].astype(str)
dfHR1['inTimeframeEnd'] = '12/31/' + dfHRtemp1['Year'].astype(str)
dfHR1.inReportingUnitTypeCV = "Hydrologic Region"
dfHR1.head(3)

Unnamed: 0,inVariable,inReportingUnitTypeCV,inReportingUnitName,inReportingUnitNativeID,inYear,inBenUse,inAmount,inTimeframeStart,inTimeframeEnd
0,Applied Water Use,Hydrologic Region,Central Coast,3,2011,Agriculture,912300.0,01/01/2011,12/31/2011
1,Applied Water Use,Hydrologic Region,Central Coast,3,2011,Instream Flow Requirements,25400.0,01/01/2011,12/31/2011
2,Applied Water Use,Hydrologic Region,Central Coast,3,2011,Managed Wetlands,400.0,01/01/2011,12/31/2011


In [14]:
# Create Individual Reporting Unit dataframes: Type = "HR", Variable = "Depletion".
# Group by Reporting Unit type and Sum the KAcreFt field.
###########################################################################################

dfHRtemp2 = dfAll
dfHRtemp2 = dfHRtemp2[dfHRtemp2.CategoryC == "Depletion"]
dfHRtemp2 = dfHRtemp2.groupby(['HR_NAME', 'HR_CODE', 'Year', 'CategoryA', 'CategoryC'])['KAcreFt'].sum().reset_index()

columnsList = ['inVariable', 'inReportingUnitTypeCV', 'inReportingUnitName', 'inReportingUnitNativeID', 'inYear', 'inBenUse', 'inAmount']
dfHR2 = pd.DataFrame(columns=columnsList)
dfHR2['inVariable'] = dfHRtemp2['CategoryC']
dfHR2['inReportingUnitName'] = dfHRtemp2['HR_NAME']
dfHR2['inReportingUnitNativeID'] = dfHRtemp2['HR_CODE']
dfHR2['inYear'] = dfHRtemp2['Year']
dfHR2['inBenUse'] = dfHRtemp2['CategoryA']
dfHR2['inAmount'] = dfHRtemp2['KAcreFt'] * 1000
dfHR2['inTimeframeStart'] = '01/01/' + dfHRtemp2['Year'].astype(str)
dfHR2['inTimeframeEnd'] = '12/31/' + dfHRtemp2['Year'].astype(str)
dfHR2.inReportingUnitTypeCV = "Hydrologic Region"
dfHR2.head(3)

Unnamed: 0,inVariable,inReportingUnitTypeCV,inReportingUnitName,inReportingUnitNativeID,inYear,inBenUse,inAmount,inTimeframeStart,inTimeframeEnd
0,Depletion,Hydrologic Region,Central Coast,3,2011,Agriculture,813600.0,01/01/2011,12/31/2011
1,Depletion,Hydrologic Region,Central Coast,3,2011,Instream Flow Requirements,0.0,01/01/2011,12/31/2011
2,Depletion,Hydrologic Region,Central Coast,3,2011,Managed Wetlands,400.0,01/01/2011,12/31/2011


In [15]:
# Create Individual Reporting Unit dataframes: Type = "DAU", Variable = "Applied Water Use".
# Group by Reporting Unit type and Sum the KAcreFt field.
###########################################################################################

dfDAUCOtemp1 = dfAll
dfDAUCOtemp1 = dfDAUCOtemp1[dfDAUCOtemp1.CategoryC == "Applied Water Use"]
dfDAUCOtemp1 = dfDAUCOtemp1.groupby(['DAU', 'DAU_NAME', 'Year', 'CategoryA', 'CategoryC'])['KAcreFt'].sum().reset_index()

columnsList = ['inVariable', 'inReportingUnitTypeCV', 'inReportingUnitName', 'inReportingUnitNativeID', 'inYear', 'inBenUse', 'inAmount']
dfDAUCO1 = pd.DataFrame(columns=columnsList)
dfDAUCO1['inVariable'] = dfDAUCOtemp1['CategoryC']
dfDAUCO1['inReportingUnitName'] = dfDAUCOtemp1['DAU_NAME']
dfDAUCO1['inReportingUnitNativeID'] = dfDAUCOtemp1['DAU']
dfDAUCO1['inYear'] = dfDAUCOtemp1['Year']
dfDAUCO1['inBenUse'] = dfDAUCOtemp1['CategoryA']
dfDAUCO1['inAmount'] = dfDAUCOtemp1['KAcreFt'] * 1000
dfDAUCO1['inTimeframeStart'] = '01/01/' + dfDAUCOtemp1['Year'].astype(str)
dfDAUCO1['inTimeframeEnd'] = '12/31/' + dfDAUCOtemp1['Year'].astype(str)
dfDAUCO1.inReportingUnitTypeCV = "Detailed Analysis Units by County"
dfDAUCO1.head(3)

Unnamed: 0,inVariable,inReportingUnitTypeCV,inReportingUnitName,inReportingUnitNativeID,inYear,inBenUse,inAmount,inTimeframeStart,inTimeframeEnd
0,Applied Water Use,Detailed Analysis Units by County,Lost River,DAU00125,2011,Agriculture,74800.0,01/01/2011,12/31/2011
1,Applied Water Use,Detailed Analysis Units by County,Lost River,DAU00125,2011,Instream Flow Requirements,0.0,01/01/2011,12/31/2011
2,Applied Water Use,Detailed Analysis Units by County,Lost River,DAU00125,2011,Managed Wetlands,79100.0,01/01/2011,12/31/2011


In [16]:
# Create Individual Reporting Unit dataframes: Type = "DAU", Variable = "Depletion".
# Group by Reporting Unit type and Sum the KAcreFt field.
###########################################################################################

dfDAUCOtemp2 = dfAll
dfDAUCOtemp2 = dfDAUCOtemp2[dfDAUCOtemp2.CategoryC == "Depletion"]
dfDAUCOtemp2 = dfDAUCOtemp2.groupby(['DAU', 'DAU_NAME', 'Year', 'CategoryA', 'CategoryC'])['KAcreFt'].sum().reset_index()

columnsList = ['inVariable', 'inReportingUnitTypeCV', 'inReportingUnitName', 'inReportingUnitNativeID', 'inYear', 'inBenUse', 'inAmount']
dfDAUCO2 = pd.DataFrame(columns=columnsList)
dfDAUCO2['inVariable'] = dfDAUCOtemp2['CategoryC']
dfDAUCO2['inReportingUnitName'] = dfDAUCOtemp2['DAU_NAME']
dfDAUCO2['inReportingUnitNativeID'] = dfDAUCOtemp2['DAU']
dfDAUCO2['inYear'] = dfDAUCOtemp2['Year']
dfDAUCO2['inBenUse'] = dfDAUCOtemp2['CategoryA']
dfDAUCO2['inAmount'] = dfDAUCOtemp2['KAcreFt'] * 1000
dfDAUCO2['inTimeframeStart'] = '01/01/' + dfDAUCOtemp2['Year'].astype(str)
dfDAUCO2['inTimeframeEnd'] = '12/31/' + dfDAUCOtemp2['Year'].astype(str)
dfDAUCO2.inReportingUnitTypeCV = "Detailed Analysis Units by County"
dfDAUCO2.head(3)

Unnamed: 0,inVariable,inReportingUnitTypeCV,inReportingUnitName,inReportingUnitNativeID,inYear,inBenUse,inAmount,inTimeframeStart,inTimeframeEnd
0,Depletion,Detailed Analysis Units by County,Lost River,DAU00125,2011,Agriculture,66700.0,01/01/2011,12/31/2011
1,Depletion,Detailed Analysis Units by County,Lost River,DAU00125,2011,Instream Flow Requirements,0.0,01/01/2011,12/31/2011
2,Depletion,Detailed Analysis Units by County,Lost River,DAU00125,2011,Managed Wetlands,77400.0,01/01/2011,12/31/2011


In [17]:
# Concatenate grouped-by reporting unit type datafraes together.
frames = [dfPA1, dfPA2, dfHR1, dfHR2, dfDAUCO1, dfDAUCO2]
dfAllbyType = pd.concat(frames)
dfAllbyType

Unnamed: 0,inVariable,inReportingUnitTypeCV,inReportingUnitName,inReportingUnitNativeID,inYear,inBenUse,inAmount,inTimeframeStart,inTimeframeEnd
0,Applied Water Use,Planning Area,101,101,2011,Agriculture,347200.0,01/01/2011,12/31/2011
1,Applied Water Use,Planning Area,101,101,2011,Instream Flow Requirements,0.0,01/01/2011,12/31/2011
2,Applied Water Use,Planning Area,101,101,2011,Managed Wetlands,171900.0,01/01/2011,12/31/2011
3,Applied Water Use,Planning Area,101,101,2011,Required Delta Outflow,0.0,01/01/2011,12/31/2011
4,Applied Water Use,Planning Area,101,101,2011,Urban,9000.0,01/01/2011,12/31/2011
...,...,...,...,...,...,...,...,...,...
14539,Depletion,Detailed Analysis Units by County,Dry Creek,DAU40549,2015,Instream Flow Requirements,0.0,01/01/2015,12/31/2015
14540,Depletion,Detailed Analysis Units by County,Dry Creek,DAU40549,2015,Managed Wetlands,0.0,01/01/2015,12/31/2015
14541,Depletion,Detailed Analysis Units by County,Dry Creek,DAU40549,2015,Required Delta Outflow,0.0,01/01/2015,12/31/2015
14542,Depletion,Detailed Analysis Units by County,Dry Creek,DAU40549,2015,Urban,1000.0,01/01/2015,12/31/2015


# Shapefile Data

In [18]:
# Shapefile input
WaterPlanAreaShape = gpd.read_file('C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/California/AggregatedAmounts/RawInputData/Water_Plan_Planning_Areas-shp/Water_Plan_Planning_Areas.shp')
HydrologicRegionsShape = gpd.read_file('C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/California/AggregatedAmounts/RawInputData/Hydrologic_Regions-shp/Hydrologic_Regions.shp')
DAUCOShape = gpd.read_file('C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/California/AggregatedAmounts/RawInputData/DAUCO-shp/WaDECADAU.shp')

In [19]:
#check shp input Water Plan Area
dfWPAshapetemp = pd.DataFrame(WaterPlanAreaShape)

columnsList = ['RU_ID', 'geometry']
dfWPAshape = pd.DataFrame(columns=columnsList)
dfWPAshape['RU_ID'] = dfWPAshapetemp['PA_NO']
dfWPAshape['geometry'] = dfWPAshapetemp['geometry']
dfWPAshape = dfWPAshape.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)
dfWPAshape.head(3)

Unnamed: 0,RU_ID,geometry
0,1001,"POLYGON ((-115.16056 35.35811, -115.16038 35.3..."
1,1002,"POLYGON ((-116.62283 34.16694, -116.62253 34.1..."
2,1003,"POLYGON ((-114.80515 34.22629, -114.80435 34.2..."


In [20]:
#check shp input Hydrologic Region
dfHRshapetemp = pd.DataFrame(HydrologicRegionsShape)

HydrologicRegionIDdict = {
"North Coast" : "1",
"San Francisco Bay" : "2",
"Central Coast" : "3",
"South Coast" : "4",
"Sacramento River" : "5",
"San Joaquin River" : "6",
"Tulare Lake" : "7",
"North Lahontan" : "8",
"South Lahontan" : "9",
"Colorado River" : "10"}
    
def retrieveHRID(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        String1 = colrowValue.strip()
        try:
            outList = HydrologicRegionIDdict[String1]
        except:
            outList = ''
    return outList

columnsList = ['RU_ID', 'geometry']
dfHRshape = pd.DataFrame(columns=columnsList)
dfHRshape['RU_ID'] = dfHRshapetemp.apply(lambda row: retrieveHRID(row['HR_NAME']), axis=1)
dfHRshape['geometry'] = dfHRshapetemp['geometry']
dfHRshape = dfHRshape.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)
dfHRshape

Unnamed: 0,RU_ID,geometry
0,3,"POLYGON ((-122.11808 37.25528, -122.11795 37.2..."
1,10,"POLYGON ((-115.12583 35.39706, -115.09804 35.3..."
2,1,"POLYGON ((-122.30410 42.00836, -122.28218 42.0..."
3,8,"POLYGON ((-119.99946 41.99466, -119.99940 41.9..."
4,5,"POLYGON ((-120.20882 41.99296, -120.20892 41.9..."
5,2,"POLYGON ((-122.60736 38.65174, -122.60694 38.6..."
6,6,"POLYGON ((-120.53749 38.75047, -120.53636 38.7..."
7,4,"POLYGON ((-119.10918 34.82375, -119.10905 34.8..."
8,9,"POLYGON ((-118.88460 38.22193, -118.87599 38.2..."
9,7,"POLYGON ((-118.89596 37.20829, -118.89539 37.2..."


In [21]:
#check shp input DAUCO
dfDAUCOshapetemp = pd.DataFrame(DAUCOShape)

columnsList = ['RU_ID', 'geometry']
dfDAUCOshape = pd.DataFrame(columns=columnsList)
dfDAUCOshape['RU_ID'] = dfDAUCOshapetemp['RU_ID']
dfDAUCOshape['geometry'] = dfDAUCOshapetemp['geometry']
dfDAUCOshape = dfDAUCOshape.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)
dfDAUCOshape.head(3)

Unnamed: 0,RU_ID,geometry
0,DAU00125,"POLYGON ((-121.08710 41.99514, -120.70108 41.9..."
1,DAU00147,"POLYGON ((-121.88226 42.00329, -121.44784 41.9..."
2,DAU00247,"POLYGON ((-122.02221 42.00440, -121.94694 42.0..."


In [22]:
# Concatenate shp datafraes together.
frames = [dfWPAshape, dfHRshape, dfDAUCOshape]
dfAllShape = pd.concat(frames).reset_index()
dfAllShape

Unnamed: 0,index,RU_ID,geometry
0,0,1001,"POLYGON ((-115.16056 35.35811, -115.16038 35.3..."
1,1,1002,"POLYGON ((-116.62283 34.16694, -116.62253 34.1..."
2,2,1003,"POLYGON ((-114.80515 34.22629, -114.80435 34.2..."
3,3,1004,"POLYGON ((-115.12583 35.39706, -115.09804 35.3..."
4,4,1005,"POLYGON ((-116.56630 33.54937, -116.56550 33.5..."
...,...,...,...
584,515,DAU40417,"POLYGON ((-122.82745 38.85906, -122.83830 38.8..."
585,516,DAU40423,"POLYGON ((-122.85634 38.86348, -122.84992 38.8..."
586,517,DAU40449,"POLYGON ((-122.82249 38.85118, -122.81670 38.8..."
587,518,DAU40523,"POLYGON ((-123.10947 38.87033, -123.11125 38.8..."


### Inspect Output Data & Export

In [23]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(dfAllbyType.dtypes)

inVariable                  object
inReportingUnitTypeCV       object
inReportingUnitName         object
inReportingUnitNativeID     object
inYear                       int64
inBenUse                    object
inAmount                   float64
inTimeframeStart            object
inTimeframeEnd              object
dtype: object


In [24]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(dfAllShape.dtypes)

index          int64
RU_ID         object
geometry    geometry
dtype: object


In [25]:
# Export out to CSV.
dfAllbyType.to_csv('P_caAggMaster.csv', index=False) # The output.
dfAllShape.to_csv('P_caGeometry.csv', index=False) # The output geometry.