# Pre-processing California for Site-Specific Division & Withdrawl Site data for WaDE upload
- Purpose:  To pre-process the data into one main file for simple DataFrame creation and extraction

In [1]:
# Needed Libraries / Modules

# ---- working with data ----
import os  # native operating system interaction
import numpy as np  # mathematical array manipulation
import pandas as pd  # data structure and data analysis
import geopandas as gpd  # geo-data structure and data analysis

# ---- visualization ----
import matplotlib.pyplot as plt  # plotting library
import seaborn as sns  # plotting library

# ---- API data retrieval ----
import requests  # http requests
import json  # JSON parse

# ---- Cleanup ----
import re  # string regular expression manipulation
from datetime import datetime  # date and time manipulation
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook
pd.set_option('display.float_format', lambda x: '%.5f' % x)  # suppress scientific notation in Pandas

In [2]:
# ---- working directory ----
workingDirString = "G:/Shared drives/WaDE Data/California/SS_DiversionsWithdrawalsWaterUse" # set working directory folder string here
os.chdir(workingDirString)
print(f'The working Directory is:', workingDirString)

The working Directory is: G:/Shared drives/WaDE Data/California/SS_DiversionsWithdrawalsWaterUse


## Data Input 1

In [3]:
# Input File - gage stations
fileInput = "RawInputData/water-rights-water-use-reported-2016-18.zip"
dfin1 = pd.read_csv(fileInput, encoding = "ISO-8859-1").replace(np.nan, "")

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfin1:
    dfin1['WaDEUUID'] = "in1" + dfin1.index.astype(str)
    dfin1.to_csv("RawInputData/water-rights-water-use-reported-2016-18.zip", compression=dict(method='zip', archive_name='water-rights-water-use-reported-2016-18.csv'), index=False)

print(len(dfin1))
dfin1.head()

  dfin1 = pd.read_csv(fileInput, encoding = "ISO-8859-1").replace(np.nan, "")


4889274


Unnamed: 0,ÃÂ¯ÃÂ»ÃÂ¿MONTH NAME,MONTH FORMATTED,WATER_RIGHT_ID,APPL_ID,YEAR,MONTH,AMOUNT,DIVERSION_TYPE,WR_WATER_RIGHT_ID,APPLICATION_NUMBER,CERTIFICATE_ID,PERMIT_ID,LICENSE_ID,WATER_RIGHT_TYPE,WATER_RIGHT_STATUS,PRIORITY_DATE,RECEIPT_DATE,REJECTION_DATE,APPLICATION_RECD_DATE,APPLICATION_ACCEPTANCE_DATE,PROJECT_TYPE,RECORD_SUMMARY,INCOMPLETE_STATEMENT,NUMBER_OF_PROTESTS,AGENT_NAME,AGENT_ENTITY_TYPE,APPLICATION_PRIMARY_OWNER,PRIMARY_OWNER_ENTITY_TYPE,SUB_TYPE,INI_REPORTED_DIV_AMOUNT,INI_REPORTED_DIV_UNIT,FACE_VALUE_AMOUNT,FACE_VALUE_UNITS,FEE_RECEIVED,APPL_FEE_AMOUNT,APPL_FEE_AMT_RECD,MAX_DD_APPL,MAX_DD_UNITS,MAX_DD_ANN,MAX_STORAGE,MAX_TAKEN_FROM_SOURCE,YEAR_DIVERSION_COMMENCED,MAX_BENEFICIALLY_USED,SUPPLEMENTAL_STATEMENT_CYCLE,TYPE_OF_DIVERSION_FACILITY,QUANTITY_OF_WATER_DIVERTED,QOW_DIVERTED_UNIT,QUANTITY_MEASUREMENT_YEAR,MAX_RATE_OF_DIVERSION,MAX_RATE_OF_DIV_UNIT,RECENT_WATER_USE_MIN,WATER_USE_MIN_UNIT,RECENT_WATER_USE_MAX,WATER_USE_MAX_UNIT,REQUEST_FOR_REVOCATION_RECD,STATE_WELL_NUMBER,DRILLED_WELL_YEAR,SURFACE_WATER_DIVERSIONS,DEPTH_OF_WELL,RELATIONSHIP_TYPE,PARTY_ID,EFFECTIVE_FROM_DATE,EFFECTIVE_TO_DATE,PRIMARY_OWNER_NAME,OFFICIAL_MAIL_RECEIVER,COUNT_NPO_OR_OTHER,EFFECTIVE_DATE,UPDATE_DATETIME,USE_CODE,USE_STATUS,NUMBER_OF_RESIDENCES,SEPERATELY_OWNED,USE_POPULATION,USE_POPULATION_PEOPLE,ESTIMATED_USE_PER_PERSON,USE_POPULATION_STOCK,TYPE_OF_STOCK,AREA_FOR_INCI_IRRIGATION,USE_NET_ACREAGE,USE_GROSS_ACREAGE,USE_DIRECT_DIV_ANNUAL_AMOUNT,USE_DIRECT_DIVERSION_RATE,USE_DIRECT_DIV_RATE_UNITS,POU_DEVELOPMENT_STATUS,DIRECT_DIV_SEASON_START,DIRECT_DIV_SEASON_END,USE_STORAGE_AMOUNT,STORAGE_SEASON_START,STORAGE_SEASON_END,SEASON_DIRECT_DIV_RATE,SEASON_STORAGE_AMOUNT,SEASON_DIRECT_DIV_AA,DIRECT_DIV_SEASON_STATUS,COLLECTION_SEASON_STATUS,USE_COUNT,POD_NUMBER,POD_ID,POD_STATUS,SOURCE_TYPE,POD_NAME,POD_TYPE,DIVERSION_WORKS_STATUS,STREAM_CLASSIFICATION,DIRECT_DIV_AMOUNT,DIRECT_DIVERSION_RATE,DIRECT_DIV_RATE_UNIT,STORAGE_AMOUNT,DIVERSION_RATE_TO_OFF_STREAM,OFF_STO_DIV_RATE_UNIT,POD_LAST_UPDATE_DATE,POD_COUNT,APPLICATION_ID,OBJECTID,POD_NUMBER_GIS,HAS_OPOD,APPL_POD,POD_ID_GIS,COUNTY,SP_ZONE,DIVERSION_SITE_NAME,NORTH_COORD,EAST_COORD,LATITUDE,LONGITUDE,QUARTER_QUARTER,QUARTER,SECTION_CLASSIFIER,SECTION_NUMBER,TOWNSHIP_NUMBER,TOWNSHIP_DIRECTION,RANGE_NUMBER,RANGE_DIRECTION,MERIDIAN,LOCATION_METHOD,SPECIAL_USE_AREA,SOURCE_NAME,TRIB_DESC,WATERSHED,HUC_12_NUMBER,HUC_12_NAME,HUC_8_NUMBER,HUC_8_NAME,QUAD_MAP_NAME,QUAD_MAP_NUMBER,PERMIT_ORIGINAL_ISSUE_DATE,COMPLETE_CONSTRUCTION_DATE,COMPLETE_APPLIC_WATER_DATE,LICENSE_ORIGINAL_ISSUE_DATE,WATER_RIGHT_DESCRIPTION,PROGRAM_UNIT,LICENSE_REQUEST_TYPE,LICENSE_REQUESTED_DATE,INSPECTION_DATE,REPORT_DATE,OFFER_SENT_DATE,ACCEPTED_OFFER_DATE,PETITION_ID,PETITION_TYPE,PETITION_STATUS_TYPE,DATE_RECEIVED,DATE_COMPLETED,PET_LAST_UPDATE_DATE,NUM_OF_PETITIONS,ENF_CASE_NUMBER,ENF_CASE_START_DATE,ENF_CASE_CLOSURE_DATE,ENF_CASE_COUNTY,NUMBER_OF_ENFORCEMENT_CASE,WaDEUUID
0,January,1/1/2016,331,A002270,2016,1,1047.0,DIRECT,331,A002270,11835.0,2631.0,11835.0,Appropriative,Licensed,,3/22/1921,,3/22/1921,3/22/1921,,Migrated data from old WRIMS system.,,0,BRIAN MUELLER,Individual,EL DORADO IRRIGATION DISTRICT,Corporation,,,,29845.9,Acre-feet per Year,5.0,5.0,5.0,63.8,Cubic Feet per Second,0.0,22000.0,29845.9,,0.0,,,,,,,,,,,,N,,,,,Primary Owner,401810.0,1/23/2004,,EL DORADO IRRIGATION DISTRICT,N,2,1/17/1986,10/5/2009 10:26:27 AM,Incidental Power,Added by change order,,,0.0,,,,,,0.0,0.0,0.0,,Cubic Feet per Second,,4/15,6/15,22000.0,11/15,6/1,63.8,22000.0,,,,7,1.0,40933.0,Active,,,"Point of Direct Diversion,Point of Storage - U...",,,0.0,27.1,Cubic Feet per Second,7000.0,,,9/28/2007 6:55:24 PM,2.0,A002270,74857.0,1.0,N,A002270_01,40933.0,El Dorado,2.0,,2025485.09699,6972060.8621,38.71507,-120.5616,NE,SW,,17.0,10.0,N,13.0,E,Mount Diablo,DD_NE,"DELTA WATERSHEDS,FULLY APPROPRIATED STREAM",SLY PARK CREEK,,MIDDLE SIERRA,180400130101.0,Sly Park Creek,18040013.0,Upper Cosumnes,SLY PARK,GG024,9/30/1926,7/1/1969,7/1/1979,1/17/1986,Migrated data from old WRIMS system.,,Permitee Request,,,,,,A002270P009092,Change,Completed,1/12/2010,,10/26/2010 1:13:45 PM,1.0,,,,,0,in10
1,January,1/1/2017,331,A002270,2017,1,0.0,DIRECT,331,A002270,11835.0,2631.0,11835.0,Appropriative,Licensed,,3/22/1921,,3/22/1921,3/22/1921,,Migrated data from old WRIMS system.,,0,BRIAN MUELLER,Individual,EL DORADO IRRIGATION DISTRICT,Corporation,,,,29845.9,Acre-feet per Year,5.0,5.0,5.0,63.8,Cubic Feet per Second,0.0,22000.0,29845.9,,0.0,,,,,,,,,,,,N,,,,,Primary Owner,401810.0,1/23/2004,,EL DORADO IRRIGATION DISTRICT,N,2,1/17/1986,10/5/2009 10:26:27 AM,Incidental Power,Added by change order,,,0.0,,,,,,0.0,0.0,0.0,,Cubic Feet per Second,,4/15,6/15,22000.0,11/15,6/1,63.8,22000.0,,,,7,1.0,40933.0,Active,,,"Point of Direct Diversion,Point of Storage - U...",,,0.0,27.1,Cubic Feet per Second,7000.0,,,9/28/2007 6:55:24 PM,2.0,A002270,74857.0,1.0,N,A002270_01,40933.0,El Dorado,2.0,,2025485.09699,6972060.8621,38.71507,-120.5616,NE,SW,,17.0,10.0,N,13.0,E,Mount Diablo,DD_NE,"DELTA WATERSHEDS,FULLY APPROPRIATED STREAM",SLY PARK CREEK,,MIDDLE SIERRA,180400130101.0,Sly Park Creek,18040013.0,Upper Cosumnes,SLY PARK,GG024,9/30/1926,7/1/1969,7/1/1979,1/17/1986,Migrated data from old WRIMS system.,,Permitee Request,,,,,,A002270P009092,Change,Completed,1/12/2010,,10/26/2010 1:13:45 PM,1.0,,,,,0,in11
2,January,1/1/2018,331,A002270,2018,1,0.0,DIRECT,331,A002270,11835.0,2631.0,11835.0,Appropriative,Licensed,,3/22/1921,,3/22/1921,3/22/1921,,Migrated data from old WRIMS system.,,0,BRIAN MUELLER,Individual,EL DORADO IRRIGATION DISTRICT,Corporation,,,,29845.9,Acre-feet per Year,5.0,5.0,5.0,63.8,Cubic Feet per Second,0.0,22000.0,29845.9,,0.0,,,,,,,,,,,,N,,,,,Primary Owner,401810.0,1/23/2004,,EL DORADO IRRIGATION DISTRICT,N,2,1/17/1986,10/5/2009 10:26:27 AM,Incidental Power,Added by change order,,,0.0,,,,,,0.0,0.0,0.0,,Cubic Feet per Second,,4/15,6/15,22000.0,11/15,6/1,63.8,22000.0,,,,7,1.0,40933.0,Active,,,"Point of Direct Diversion,Point of Storage - U...",,,0.0,27.1,Cubic Feet per Second,7000.0,,,9/28/2007 6:55:24 PM,2.0,A002270,74857.0,1.0,N,A002270_01,40933.0,El Dorado,2.0,,2025485.09699,6972060.8621,38.71507,-120.5616,NE,SW,,17.0,10.0,N,13.0,E,Mount Diablo,DD_NE,"DELTA WATERSHEDS,FULLY APPROPRIATED STREAM",SLY PARK CREEK,,MIDDLE SIERRA,180400130101.0,Sly Park Creek,18040013.0,Upper Cosumnes,SLY PARK,GG024,9/30/1926,7/1/1969,7/1/1979,1/17/1986,Migrated data from old WRIMS system.,,Permitee Request,,,,,,A002270P009092,Change,Completed,1/12/2010,,10/26/2010 1:13:45 PM,1.0,,,,,0,in12
3,January,1/1/2021,331,A002270,2021,1,0.0,DIRECT,331,A002270,11835.0,2631.0,11835.0,Appropriative,Licensed,,3/22/1921,,3/22/1921,3/22/1921,,Migrated data from old WRIMS system.,,0,BRIAN MUELLER,Individual,EL DORADO IRRIGATION DISTRICT,Corporation,,,,29845.9,Acre-feet per Year,5.0,5.0,5.0,63.8,Cubic Feet per Second,0.0,22000.0,29845.9,,0.0,,,,,,,,,,,,N,,,,,Primary Owner,401810.0,1/23/2004,,EL DORADO IRRIGATION DISTRICT,N,2,1/17/1986,10/5/2009 10:26:27 AM,Incidental Power,Added by change order,,,0.0,,,,,,0.0,0.0,0.0,,Cubic Feet per Second,,4/15,6/15,22000.0,11/15,6/1,63.8,22000.0,,,,7,1.0,40933.0,Active,,,"Point of Direct Diversion,Point of Storage - U...",,,0.0,27.1,Cubic Feet per Second,7000.0,,,9/28/2007 6:55:24 PM,2.0,A002270,74857.0,1.0,N,A002270_01,40933.0,El Dorado,2.0,,2025485.09699,6972060.8621,38.71507,-120.5616,NE,SW,,17.0,10.0,N,13.0,E,Mount Diablo,DD_NE,"DELTA WATERSHEDS,FULLY APPROPRIATED STREAM",SLY PARK CREEK,,MIDDLE SIERRA,180400130101.0,Sly Park Creek,18040013.0,Upper Cosumnes,SLY PARK,GG024,9/30/1926,7/1/1969,7/1/1979,1/17/1986,Migrated data from old WRIMS system.,,Permitee Request,,,,,,A002270P009092,Change,Completed,1/12/2010,,10/26/2010 1:13:45 PM,1.0,,,,,0,in13
4,January,1/1/2022,331,A002270,2022,1,0.0,DIRECT,331,A002270,11835.0,2631.0,11835.0,Appropriative,Licensed,,3/22/1921,,3/22/1921,3/22/1921,,Migrated data from old WRIMS system.,,0,BRIAN MUELLER,Individual,EL DORADO IRRIGATION DISTRICT,Corporation,,,,29845.9,Acre-feet per Year,5.0,5.0,5.0,63.8,Cubic Feet per Second,0.0,22000.0,29845.9,,0.0,,,,,,,,,,,,N,,,,,Primary Owner,401810.0,1/23/2004,,EL DORADO IRRIGATION DISTRICT,N,2,1/17/1986,10/5/2009 10:26:27 AM,Incidental Power,Added by change order,,,0.0,,,,,,0.0,0.0,0.0,,Cubic Feet per Second,,4/15,6/15,22000.0,11/15,6/1,63.8,22000.0,,,,7,1.0,40933.0,Active,,,"Point of Direct Diversion,Point of Storage - U...",,,0.0,27.1,Cubic Feet per Second,7000.0,,,9/28/2007 6:55:24 PM,2.0,A002270,74857.0,1.0,N,A002270_01,40933.0,El Dorado,2.0,,2025485.09699,6972060.8621,38.71507,-120.5616,NE,SW,,17.0,10.0,N,13.0,E,Mount Diablo,DD_NE,"DELTA WATERSHEDS,FULLY APPROPRIATED STREAM",SLY PARK CREEK,,MIDDLE SIERRA,180400130101.0,Sly Park Creek,18040013.0,Upper Cosumnes,SLY PARK,GG024,9/30/1926,7/1/1969,7/1/1979,1/17/1986,Migrated data from old WRIMS system.,,Permitee Request,,,,,,A002270P009092,Change,Completed,1/12/2010,,10/26/2010 1:13:45 PM,1.0,,,,,0,in14


In [4]:
# Clean data a little
dfin1 = dfin1.fillna("") # remove nan values
dfin1['USE_DIRECT_DIVERSION_RATE'] = pd.to_numeric(dfin1['USE_DIRECT_DIVERSION_RATE'], errors='coerce').fillna(0) # make sure this is numeric.

In [5]:
# convert units to WaDE Approproiate values (CFS or AF)
def convertAmountToUnitFunc(val, unit):
    outValue = None
    if unit == "Cubic Feet per Second":
        outValue = val
    if unit == "Gallons per Day":
        outValue = val / (646316.883)
    if unit == "Acre-feet per Year":
        outValue = val / (723.968)
    if unit == "Gallons per Minute":
        outValue = val / (448.83117)
    if unit == 'Acre-feet':
        outValue = val
    if unit == 'Gallons':
        outValue = val / (325850.943)
    return(outValue)

dfin1['in_Amount'] = dfin1.apply(lambda row: convertAmountToUnitFunc(row['USE_DIRECT_DIVERSION_RATE'], row['USE_DIRECT_DIV_RATE_UNITS']), axis=1)
dfin1['in_Amount'].unique()

array([0.00000000e+00,            nan, 1.00000000e+00, 6.65308321e-03,
       3.09445731e-04, 4.64168596e-03, 2.90105372e-03, 1.89000000e+02,
       2.00000000e+02, 3.01709587e-03, 7.73614326e-04, 2.32084298e-04,
       1.54722865e-04, 3.86807163e-05, 7.00000000e+00, 1.46986722e-02,
       1.20683835e-02, 2.47556584e-02, 8.00000000e+00, 1.15268535e-02,
       5.00000000e+00, 1.31514435e-02, 2.22800926e-03, 9.28337192e-04,
       3.09445731e-03, 1.58590937e-02, 1.50000000e+01, 5.00000000e+01,
       1.54722865e-03, 2.30000000e+01, 1.30000000e+02, 1.85667438e-03,
       1.39250579e-03, 7.73614326e-05, 4.50000000e+01, 3.30000000e+02,
       3.90000000e+01, 3.40390304e-03, 5.00000000e+03, 2.20000000e+01,
       2.80000000e+01, 3.00000000e+00, 1.85667438e-04, 3.86807163e-03,
       2.70000000e+01, 8.00000000e+01, 4.50000000e+02, 1.16000000e+02,
       1.09600000e+03, 3.69849536e-01, 4.00000000e+00, 1.60000000e+01,
       1.70195152e-02, 2.00000000e+00, 1.67100694e-02, 4.64168596e-04,
      

In [6]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = dfin1['WaDEUUID']

# Method Info
df['in_MethodUUID'] = "CAssdw_M1"

# Variable Info
df['in_AggregationIntervalUnitCV'] = "Monthly"
df['in_VariableCV'] = "Water Use"

# Organization Info
df['in_OrganizationUUID'] = "CAssdw_O1"

# WaterSource Info
df['in_Geometry'] = ""
df['in_GNISFeatureNameCV'] = ""
df['in_WaterQualityIndicatorCV'] = ""
df['in_WaterSourceName'] = dfin1['SOURCE_NAME']
df['in_WaterSourceNativeID'] = "" # auto fill in below if not provdied by state
df['in_WaterSourceTypeCV'] = dfin1['SOURCE_TYPE']

# Site Info
df['in_CoordinateAccuracy'] = ""
df['in_CoordinateMethodCV'] = dfin1['LOCATION_METHOD']
df['in_County'] = dfin1['COUNTY']
df['in_EPSGCodeCV'] = 4326
df['in_Geometry'] = ""
df['in_GNISCodeCV'] = ""
df['in_HUC12'] = dfin1['HUC_12_NUMBER']
df['in_HUC8'] = dfin1['HUC_8_NUMBER']
df['in_Latitude'] = dfin1['LATITUDE']
df['in_Longitude'] = dfin1['LONGITUDE']
df['in_NHDNetworkStatusCV'] = ""
df['in_NHDProductCV'] = ""
df['in_PODorPOUSite'] = "POD" 
df['in_SiteName'] = dfin1['POD_NAME']
df['in_SiteNativeID'] = dfin1['POD_ID']
df['in_SitePoint'] = ""
df['in_SiteTypeCV'] = dfin1['POD_TYPE']
df['in_StateCV'] = "CA"
df['in_USGSSiteID'] = ""

# Site VariableAmounts Info
df['in_Amount'] = dfin1['in_Amount']
df['in_AllocationCropDutyAmount'] = ""
df['in_AssociatedNativeAllocationIDs'] = dfin1['APPLICATION_NUMBER']
df['in_BeneficialUseCategory'] = dfin1['USE_CODE']
df['in_CommunityWaterSupplySystem'] = ""
df['in_CropTypeCV'] = ""
df['in_CustomerTypeCV'] = ""
df['in_DataPublicationDate'] = ""
df['in_DataPublicationDOI'] = ""
df['in_Geometry'] = ""
#df['in_IrrigatedAcreage'] = dfin1['USE_NET_ACREAGE']
df['in_IrrigatedAcreage'] = ""
df['in_IrrigationMethodCV'] = ""
#df['in_PopulationServed'] = dfin1['USE_POPULATION']
df['in_PopulationServed'] = ""
df['in_PowerGeneratedGWh'] = ""
df['in_PowerType'] = ""
df['in_PrimaryUseCategory'] = dfin1['USE_CODE']
df['in_ReportYearCV'] = dfin1['YEAR']
df['in_SDWISIdentifier'] = ""
df['in_TimeframeEnd'] = dfin1['MONTH FORMATTED']
df['in_TimeframeStart'] = dfin1['MONTH FORMATTED']

outdf1 = df.copy()
outdf1 = outdf1.drop_duplicates().reset_index(drop=True)
print(len(outdf1))
outdf1.head()

4889274


Unnamed: 0,WaDEUUID,in_MethodUUID,in_AggregationIntervalUnitCV,in_VariableCV,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_Amount,in_AllocationCropDutyAmount,in_AssociatedNativeAllocationIDs,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_IrrigatedAcreage,in_IrrigationMethodCV,in_PopulationServed,in_PowerGeneratedGWh,in_PowerType,in_PrimaryUseCategory,in_ReportYearCV,in_SDWISIdentifier,in_TimeframeEnd,in_TimeframeStart
0,in10,CAssdw_M1,Monthly,Water Use,CAssdw_O1,,,,SLY PARK CREEK,,,,DD_NE,El Dorado,4326,,180400130101.0,18040013.0,38.71507,-120.5616,,,POD,,40933.0,,"Point of Direct Diversion,Point of Storage - U...",CA,,0.0,,A002270,Incidental Power,,,,,,,,,,,Incidental Power,2016,,1/1/2016,1/1/2016
1,in11,CAssdw_M1,Monthly,Water Use,CAssdw_O1,,,,SLY PARK CREEK,,,,DD_NE,El Dorado,4326,,180400130101.0,18040013.0,38.71507,-120.5616,,,POD,,40933.0,,"Point of Direct Diversion,Point of Storage - U...",CA,,0.0,,A002270,Incidental Power,,,,,,,,,,,Incidental Power,2017,,1/1/2017,1/1/2017
2,in12,CAssdw_M1,Monthly,Water Use,CAssdw_O1,,,,SLY PARK CREEK,,,,DD_NE,El Dorado,4326,,180400130101.0,18040013.0,38.71507,-120.5616,,,POD,,40933.0,,"Point of Direct Diversion,Point of Storage - U...",CA,,0.0,,A002270,Incidental Power,,,,,,,,,,,Incidental Power,2018,,1/1/2018,1/1/2018
3,in13,CAssdw_M1,Monthly,Water Use,CAssdw_O1,,,,SLY PARK CREEK,,,,DD_NE,El Dorado,4326,,180400130101.0,18040013.0,38.71507,-120.5616,,,POD,,40933.0,,"Point of Direct Diversion,Point of Storage - U...",CA,,0.0,,A002270,Incidental Power,,,,,,,,,,,Incidental Power,2021,,1/1/2021,1/1/2021
4,in14,CAssdw_M1,Monthly,Water Use,CAssdw_O1,,,,SLY PARK CREEK,,,,DD_NE,El Dorado,4326,,180400130101.0,18040013.0,38.71507,-120.5616,,,POD,,40933.0,,"Point of Direct Diversion,Point of Storage - U...",CA,,0.0,,A002270,Incidental Power,,,,,,,,,,,Incidental Power,2022,,1/1/2022,1/1/2022


## Data Input 2
- site info
- timeseries info

In [7]:
# etc etc,
# outdf2

## Concatenate DataFrames together

In [8]:
# Concatenate dataframes
frames = [outdf1] # list all out dataframes here
outdf = pd.concat(frames)
outdf = outdf.drop_duplicates().reset_index(drop=True).replace(np.nan, "")
print(len(outdf))

4889274


## Clean Data / data types

In [9]:
outdf['in_WaterSourceTypeCV'].unique()

array(['', 'Surface', 'Subsurface'], dtype=object)

In [10]:
# updating in_WaterSourceTypeCV to be more machine readable / WaDE specific
# ----------------------------------------------------------------------------------------------------

def createWaterSourceTypeCV(inWST):
    inWST = str(inWST).strip()
    
    if inWST == "":
        outString = "Unspecified"
    elif inWST == "Subsurfacer":
        outString = "Groundwater"
    else:
        outString =  "Surface Water"
      
    return outString

outdf['in_WaterSourceTypeCV'] = outdf.apply(lambda row: createWaterSourceTypeCV(row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceTypeCV'].unique()

array(['Unspecified', 'Surface Water'], dtype=object)

In [11]:
# Fill empty ben use values

def fillEmptyBenUseFunc(val):
    val = str(val).strip()
    if val == "" or val == " " or val == "nan" or pd.isnull(val):
        outString = "Unspecified"
    else:
        outString = val
    return outString
    
outdf['in_BeneficialUseCategory'] = outdf.apply(lambda row: fillEmptyBenUseFunc(row['in_BeneficialUseCategory']), axis=1)
outdf['in_PrimaryUseCategory'] = outdf.apply(lambda row: fillEmptyBenUseFunc(row['in_PrimaryUseCategory']), axis=1)
outdf['in_BeneficialUseCategory'].unique()

array(['Incidental Power', 'Domestic', 'Irrigation',
       'Fish and Wildlife Preservation and Enhancement', 'Recreational',
       'Municipal', 'Power', 'Fire Protection', 'Industrial',
       'Aquaculture', 'Mining', 'Frost Protection', 'Stockwatering',
       'Milling', 'Heat Control', 'Other', 'Dust Control', 'Unspecified',
       'Snow Making', 'Aesthetic', 'Water Quality'], dtype=object)

In [12]:
# Clean name entries of spcial characters
def removeSpecialCharsFunc(Val):
    Val = str(Val)
    Val = re.sub("[$@&.;/\),(-]", "", Val).title().replace("  ", " ").strip()
    return Val

In [13]:
outdf['in_WaterSourceName'] = outdf.apply(lambda row: removeSpecialCharsFunc(row['in_WaterSourceName']), axis=1)
outdf['in_WaterSourceName'].unique()

array(['Sly Park Creek', 'Icy Spring No 2', 'Forsythe Creek', ...,
       'Boardman Lake', 'Stone Lakesnodgrass Slough', 'Mokeloumne River'],
      dtype=object)

In [14]:
outdf['in_SiteName'] = outdf.apply(lambda row: removeSpecialCharsFunc(row['in_SiteName']), axis=1)
outdf['in_SiteName'].unique()

array(['', 'Decree No 6616', 'Houk Pump', ..., "Mario'S Pump",
       'Spocjlei Pod', 'Thornton Almonds Llc Pump'], dtype=object)

In [15]:
outdf['in_County'] = outdf.apply(lambda row: removeSpecialCharsFunc(row['in_County']), axis=1)
outdf['in_County'].unique()

array(['El Dorado', 'Los Angeles', 'Mendocino', 'Sutter', 'Siskiyou',
       'Yuba', 'Stanislaus', 'Santa Barbara', 'Yolo', 'Nevada', 'Sonoma',
       'Sacramento', 'San Mateo', 'Mono', 'Butte', 'Lake', 'San Joaquin',
       'Trinity', 'Placer', 'Modoc', 'Merced', 'Contra Costa', 'Sierra',
       'Glenn', 'Amador', 'Plumas', 'Inyo', 'Santa Cruz', 'Shasta',
       'Madera', 'Santa Clara', 'Napa', 'San Bernardino', 'Colusa',
       'Monterey', 'San Diego', 'Del Norte', 'Riverside', 'Tulare',
       'Kings', 'San Benito', 'Alameda', 'Marin', 'Humboldt', 'Calaveras',
       'Lassen', 'Kern', 'Alpine', 'Tehama', 'Orange', 'Tuolumne',
       'Solano', 'Ventura', '', 'Fresno', 'San Luis Obispo', 'Mariposa',
       'Imperial', 'San Francisco'], dtype=object)

In [16]:
# Ensure Empty String / remove string value of "nan"

def ensureEmptyString(val):
    val = str(val).strip()
    if val == "" or val == " " or val == "nan" or pd.isnull(val):
        outString = ""
    else:
        outString = val
    return outString

In [17]:
outdf['in_WaterSourceName'] = outdf.apply(lambda row: ensureEmptyString(row['in_WaterSourceName']), axis=1)
outdf['in_WaterSourceName'].unique()

array(['Sly Park Creek', 'Icy Spring No 2', 'Forsythe Creek', ...,
       'Boardman Lake', 'Stone Lakesnodgrass Slough', 'Mokeloumne River'],
      dtype=object)

In [18]:
outdf['in_WaterSourceTypeCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceTypeCV'].unique()

array(['Unspecified', 'Surface Water'], dtype=object)

In [19]:
outdf['in_SiteTypeCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_SiteTypeCV']), axis=1)
outdf['in_SiteTypeCV'].unique()

array(['Point of Direct Diversion,Point of Storage - Unspecified',
       'Point of Direct Diversion', 'Movable Point of Diversion',
       'Point of Storage - Unspecified,Point of Storage - Unspecified',
       'Point of Storage - Unspecified', 'Point of Onstream Storage',
       'Point of Direct Diversion,Point of Diversion to Offstream Storage',
       'Point of Diversion to Underground Storage',
       'Point of Direct Diversion,Point of Rediversion',
       'Point of Rediversion',
       'Point of Diversion to Underground Storage,Point of Storage - Unspecified,Point of Storage - Unspecified',
       'Point of Direct Diversion,Point of Direct Diversion',
       'Point of Direct Diversion,Point of Onstream Storage',
       'Point of Direct Diversion,Point of Onstream Storage,Point of Rediversion',
       'Point of Diversion to Underground Storage,Point of Onstream Storage',
       'Point of Diversion to Underground Storage,Point of Rediversion',
       'Point of Diversion to Undergr

In [20]:
outdf['in_SiteName'] = outdf.apply(lambda row: ensureEmptyString(row['in_SiteName']), axis=1)
outdf['in_SiteName'].unique()

array(['', 'Decree No 6616', 'Houk Pump', ..., "Mario'S Pump",
       'Spocjlei Pod', 'Thornton Almonds Llc Pump'], dtype=object)

In [21]:
outdf['in_County'] = outdf.apply(lambda row: ensureEmptyString(row['in_County']), axis=1)
outdf['in_County'].unique()

array(['El Dorado', 'Los Angeles', 'Mendocino', 'Sutter', 'Siskiyou',
       'Yuba', 'Stanislaus', 'Santa Barbara', 'Yolo', 'Nevada', 'Sonoma',
       'Sacramento', 'San Mateo', 'Mono', 'Butte', 'Lake', 'San Joaquin',
       'Trinity', 'Placer', 'Modoc', 'Merced', 'Contra Costa', 'Sierra',
       'Glenn', 'Amador', 'Plumas', 'Inyo', 'Santa Cruz', 'Shasta',
       'Madera', 'Santa Clara', 'Napa', 'San Bernardino', 'Colusa',
       'Monterey', 'San Diego', 'Del Norte', 'Riverside', 'Tulare',
       'Kings', 'San Benito', 'Alameda', 'Marin', 'Humboldt', 'Calaveras',
       'Lassen', 'Kern', 'Alpine', 'Tehama', 'Orange', 'Tuolumne',
       'Solano', 'Ventura', '', 'Fresno', 'San Luis Obispo', 'Mariposa',
       'Imperial', 'San Francisco'], dtype=object)

In [22]:
# Ensure Latitude entry is numireic, replace '0' values for removal
outdf['in_Latitude'] = pd.to_numeric(outdf['in_Latitude'], errors='coerce').replace(0,"").fillna("")
outdf['in_Latitude'].unique()

array([38.71507495, 34.57797187, 39.24992704, ..., 38.41246111,
       38.20644444, 40.14626712], dtype=object)

In [23]:
# Ensure Longitude entry is numireic, replace '0' values for removal
outdf['in_Longitude'] = pd.to_numeric(outdf['in_Longitude'], errors='coerce').replace(0,"").fillna("")
outdf['in_Longitude'].unique()

array([-120.56159884, -118.28920963, -123.20600424, ..., -121.50423333,
       -121.48065833, -123.87636654], dtype=object)

In [24]:
# Ensure Amount entry is either numireic or blank, no 0 entries
outdf['in_Amount'] = pd.to_numeric(outdf['in_Amount'], errors='coerce').round(2).replace(0,"").fillna("")
outdf['in_Amount'].unique()

array(['', 1.0, 0.01, 189.0, 200.0, 7.0, 0.02, 8.0, 5.0, 15.0, 50.0, 23.0,
       130.0, 45.0, 330.0, 39.0, 5000.0, 22.0, 28.0, 3.0, 27.0, 80.0,
       450.0, 116.0, 1096.0, 0.37, 4.0, 16.0, 2.0, 32.0, 13.0, 6.0, 100.0,
       20.0, 11.0, 198.0, 132.0, 34.0, 35.0, 10.0, 0.03, 700.0, 40.0,
       60.0, 0.07, 350.0, 12.0, 6.02, 0.1, 5.12, 0.33, 175.0, 0.04, 1.23,
       24.0, 0.45, 0.27, 30.0, 1.34, 0.08, 25.0, 300.0, 20000.0, 1.67,
       0.11, 0.17, 0.56, 0.87, 0.43, 1.11, 0.67, 7.58, 2.01, 55.7, 66.84,
       4.46, 25.66, 0.06, 0.25, 3.01, 2.23, 0.13, 61.27, 5.79, 2.67, 3.92,
       0.34, 41.0, 21.0, 277.0, 9.0, 18.0, 59.0, 1697.0, 85.0, 730.0,
       305.0, 70.0, 90.0, 13.62, 3.34, 8.91, 7.12, 0.89, 17.0, 650.0,
       120.0, 54.0, 150.0, 65.0, 225.0, 125.0, 103.0, 75.0, 360.0, 56.0,
       19.0, 26.0, 180.0, 31.0, 123.0, 500.0, 165.0, 155.0, 33.0, 42.0,
       1493.0, 37.0, 265.0, 835.0, 14.0, 400.0, 1483.0, 800.0, 67.0, 51.0,
       46.0, 1782.0, 380.0, 36.0, 250.0, 104.0, 425.0, 6

In [25]:
# # Ensure PopulationServed entry is numireic WITH 0 entries (no blank strings)
# outdf['in_PopulationServed'] = pd.to_numeric(outdf['in_PopulationServed'], errors='coerce').round().replace("",0).fillna(0).astype(int)
# outdf['in_PopulationServed'].unique()

In [26]:
# Convert TimeframeEnd to YYYY-MM-DD format.
outdf['in_TimeframeEnd'] = pd.to_datetime(outdf['in_TimeframeEnd'], utc=True, errors = 'coerce').fillna("")
outdf['in_TimeframeEnd'] = pd.to_datetime(outdf["in_TimeframeEnd"].dt.strftime('%m/%d/%Y'))
outdf['in_TimeframeEnd'].unique()

<DatetimeArray>
['2016-01-01 00:00:00', '2017-01-01 00:00:00', '2018-01-01 00:00:00',
 '2021-01-01 00:00:00', '2022-01-01 00:00:00', '2016-02-01 00:00:00',
 '2017-02-01 00:00:00', '2018-02-01 00:00:00', '2021-02-01 00:00:00',
 '2022-02-01 00:00:00',
 ...
 '1985-03-01 00:00:00', '1985-04-01 00:00:00', '1985-05-01 00:00:00',
 '1985-06-01 00:00:00', '1985-07-01 00:00:00', '1985-08-01 00:00:00',
 '1985-09-01 00:00:00', '1985-10-01 00:00:00', '1985-11-01 00:00:00',
 '1985-12-01 00:00:00']
Length: 432, dtype: datetime64[ns]

In [27]:
# Convert TimeframeStart to YYYY-MM-DD format.
outdf['in_TimeframeStart'] = pd.to_datetime(outdf['in_TimeframeStart'], utc=True, errors = 'coerce').fillna("")
outdf['in_TimeframeStart'] = pd.to_datetime(outdf["in_TimeframeStart"].dt.strftime('%m/%d/%Y'))
outdf['in_TimeframeStart'].unique()

<DatetimeArray>
['2016-01-01 00:00:00', '2017-01-01 00:00:00', '2018-01-01 00:00:00',
 '2021-01-01 00:00:00', '2022-01-01 00:00:00', '2016-02-01 00:00:00',
 '2017-02-01 00:00:00', '2018-02-01 00:00:00', '2021-02-01 00:00:00',
 '2022-02-01 00:00:00',
 ...
 '1985-03-01 00:00:00', '1985-04-01 00:00:00', '1985-05-01 00:00:00',
 '1985-06-01 00:00:00', '1985-07-01 00:00:00', '1985-08-01 00:00:00',
 '1985-09-01 00:00:00', '1985-10-01 00:00:00', '1985-11-01 00:00:00',
 '1985-12-01 00:00:00']
Length: 432, dtype: datetime64[ns]

In [28]:
# extract year out
# outdf['in_ReportYearCV'] = pd.to_datetime(outdf['in_ReportYearCV'], utc=True)
# outdf['in_ReportYearCV'] = pd.to_datetime(outdf["in_ReportYearCV"].dt.strftime('%m/%d/%Y'))
# outdf['in_ReportYearCV'] = outdf['in_ReportYearCV'].dt.year
outdf['in_ReportYearCV'].unique()

array([2016, 2017, 2018, 2021, 2022, 1995, 1993, 1994, 1991, 1992, 1975,
       1986, 1987, 1988, 1989, 1990, 1915, 1963, 1913, 1959, 1906, 1976,
       1916, 1971, 1972, 1973, 1974, 1977, 1978, 1979, 1980, 1981, 1982,
       1983, 1984, 1985], dtype=int64)

In [29]:
# Creating WaDE Custom VariableSpecificCV
# ----------------------------------------------------------------------------------------------------
def createVariableSpecificCV(inV, inAIU, inBU, inWST):
    inV = str(inV).strip()
    inAIU = str(inAIU).strip()
    inBU = str(inBU).strip().title()
    inWST = str(inWST).strip()
    outString = inV + "_" + inAIU + "_" + inBU + "_" + inWST
    return outString

outdf['in_VariableSpecificCV'] = outdf.apply(lambda row: createVariableSpecificCV(row['in_VariableCV'], 
                                                                                  row['in_AggregationIntervalUnitCV'],
                                                                                  row['in_BeneficialUseCategory'],
                                                                                  row['in_WaterSourceTypeCV']), axis=1)
outdf['in_VariableSpecificCV'].unique()

array(['Water Use_Monthly_Incidental Power_Unspecified',
       'Water Use_Monthly_Domestic_Unspecified',
       'Water Use_Monthly_Irrigation_Unspecified',
       'Water Use_Monthly_Fish And Wildlife Preservation And Enhancement_Unspecified',
       'Water Use_Monthly_Domestic_Surface Water',
       'Water Use_Monthly_Recreational_Unspecified',
       'Water Use_Monthly_Irrigation_Surface Water',
       'Water Use_Monthly_Municipal_Unspecified',
       'Water Use_Monthly_Power_Unspecified',
       'Water Use_Monthly_Fire Protection_Unspecified',
       'Water Use_Monthly_Industrial_Unspecified',
       'Water Use_Monthly_Municipal_Surface Water',
       'Water Use_Monthly_Power_Surface Water',
       'Water Use_Monthly_Aquaculture_Unspecified',
       'Water Use_Monthly_Mining_Unspecified',
       'Water Use_Monthly_Frost Protection_Unspecified',
       'Water Use_Monthly_Stockwatering_Unspecified',
       'Water Use_Monthly_Milling_Unspecified',
       'Water Use_Monthly_Heat Control

In [30]:
# Creating WaDE Custom water source native ID for easy water source identification
# use unique WaterSourceName and WaterSourceType values
# ----------------------------------------------------------------------------------------------------

# Create temp in_WaterSourceNativeID dataframe of unique water source.
def assignIdValueFunc(colRowValue):
    string1 = str(colRowValue)
    outstring = "wadeId" + string1
    return outstring

dfTempID = pd.DataFrame()
dfTempID['in_WaterSourceName'] = outdf['in_WaterSourceName'].astype(str).str.strip()
dfTempID['in_WaterSourceTypeCV'] = outdf['in_WaterSourceTypeCV'].astype(str).str.strip()
dfTempID = dfTempID.drop_duplicates()

dfTempCount = pd.DataFrame(index=dfTempID.index)
dfTempCount["Count"] = range(1, len(dfTempCount.index) + 1)
dfTempID['in_WaterSourceNativeID'] = dfTempCount.apply(lambda row: assignIdValueFunc(row['Count']), axis=1)
dfTempID['linkKey'] = dfTempID['in_WaterSourceName'].astype(str) + dfTempID['in_WaterSourceTypeCV'].astype(str)
IdDict = pd.Series(dfTempID.in_WaterSourceNativeID.values, index=dfTempID.linkKey.astype(str)).to_dict()
# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom site native ID
def retrieveIdValueFunc(checkVal, valA, valB):
    checkVal = str(checkVal).strip()
    if checkVal == "":
        linkKeyVal = str(valA).strip() + str(valB).strip()
        outString = IdDict[linkKeyVal]
    else:
        outString = checkVal
    return outString

outdf['in_WaterSourceNativeID'] = outdf.apply(lambda row: retrieveIdValueFunc(row['in_WaterSourceNativeID'], 
                                                                              row['in_WaterSourceName'], row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceNativeID'].unique()

array(['wadeId1', 'wadeId2', 'wadeId3', ..., 'wadeId7368', 'wadeId7369',
       'wadeId7370'], dtype=object)

In [31]:
# Creating WaDE Custom site native ID for easy site identification
# use Unique Latitude, Longitude, SiteName and SiteTypeCV values
# ----------------------------------------------------------------------------------------------------

# Create temp in_SiteNativeID dataframe of unique water source.
def assignIdValueFunc(colRowValue):
    string1 = str(colRowValue)
    outstring = "wadeId" + string1
    return outstring

dfTempID = pd.DataFrame()
dfTempID['in_Latitude'] = outdf['in_Latitude'].astype(str).str.strip()
dfTempID['in_Longitude'] = outdf['in_Longitude'].astype(str).str.strip()
dfTempID['in_SiteName'] = outdf['in_SiteName'].astype(str).str.strip()
dfTempID['in_SiteTypeCV'] = outdf['in_SiteTypeCV'].astype(str).str.strip()
dfTempID = dfTempID.drop_duplicates()

dfTempCount = pd.DataFrame(index=dfTempID.index)
dfTempCount["Count"] = range(1, len(dfTempCount.index) + 1)
dfTempID['in_SiteNativeID'] = dfTempCount.apply(lambda row: assignIdValueFunc(row['Count']), axis=1)
dfTempID['linkKey'] = dfTempID['in_Latitude'].astype(str) + dfTempID['in_Longitude'].astype(str) + dfTempID['in_SiteName'].astype(str)+ dfTempID['in_SiteTypeCV'].astype(str)
IdDict = pd.Series(dfTempID.in_SiteNativeID.values, index=dfTempID.linkKey.astype(str)).to_dict()
# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom site native ID
def retrieveIdValueFunc(checkVal, valA, valB, valC, valD):
    checkVal = str(checkVal).strip()
    if checkVal == "":
        linkKeyVal = str(valA).strip() + str(valB).strip() + str(valC).strip() + str(valD).strip()
        outString = IdDict[linkKeyVal]
    else:
        outString = checkVal
    return outString

outdf['in_SiteNativeID'] = outdf.apply(lambda row: retrieveIdValueFunc(row['in_SiteNativeID'], 
                                                                       row['in_Latitude'], row['in_Longitude'],
                                                                       row['in_SiteName'], row['in_SiteTypeCV']), axis=1)
outdf['in_SiteNativeID'].unique()

array(['40933.0', '34881.0', '28895.0', ..., '84721.0', '84722.0',
       '64319.0'], dtype=object)

## Export Outputs

In [32]:
outdf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4889274 entries, 0 to 4889273
Data columns (total 49 columns):
 #   Column                            Dtype         
---  ------                            -----         
 0   WaDEUUID                          object        
 1   in_MethodUUID                     object        
 2   in_AggregationIntervalUnitCV      object        
 3   in_VariableCV                     object        
 4   in_OrganizationUUID               object        
 5   in_Geometry                       object        
 6   in_GNISFeatureNameCV              object        
 7   in_WaterQualityIndicatorCV        object        
 8   in_WaterSourceName                object        
 9   in_WaterSourceNativeID            object        
 10  in_WaterSourceTypeCV              object        
 11  in_CoordinateAccuracy             object        
 12  in_CoordinateMethodCV             object        
 13  in_County                         object        
 14  in_EPSGCodeCV     

In [33]:
outdf

Unnamed: 0,WaDEUUID,in_MethodUUID,in_AggregationIntervalUnitCV,in_VariableCV,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_Amount,in_AllocationCropDutyAmount,in_AssociatedNativeAllocationIDs,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_IrrigatedAcreage,in_IrrigationMethodCV,in_PopulationServed,in_PowerGeneratedGWh,in_PowerType,in_PrimaryUseCategory,in_ReportYearCV,in_SDWISIdentifier,in_TimeframeEnd,in_TimeframeStart,in_VariableSpecificCV
0,in10,CAssdw_M1,Monthly,Water Use,CAssdw_O1,,,,Sly Park Creek,wadeId1,Unspecified,,DD_NE,El Dorado,4326,,180400130101.00000,18040013.00000,38.71507,-120.56160,,,POD,,40933.0,,"Point of Direct Diversion,Point of Storage - U...",CA,,,,A002270,Incidental Power,,,,,,,,,,,Incidental Power,2016,,2016-01-01,2016-01-01,Water Use_Monthly_Incidental Power_Unspecified
1,in11,CAssdw_M1,Monthly,Water Use,CAssdw_O1,,,,Sly Park Creek,wadeId1,Unspecified,,DD_NE,El Dorado,4326,,180400130101.00000,18040013.00000,38.71507,-120.56160,,,POD,,40933.0,,"Point of Direct Diversion,Point of Storage - U...",CA,,,,A002270,Incidental Power,,,,,,,,,,,Incidental Power,2017,,2017-01-01,2017-01-01,Water Use_Monthly_Incidental Power_Unspecified
2,in12,CAssdw_M1,Monthly,Water Use,CAssdw_O1,,,,Sly Park Creek,wadeId1,Unspecified,,DD_NE,El Dorado,4326,,180400130101.00000,18040013.00000,38.71507,-120.56160,,,POD,,40933.0,,"Point of Direct Diversion,Point of Storage - U...",CA,,,,A002270,Incidental Power,,,,,,,,,,,Incidental Power,2018,,2018-01-01,2018-01-01,Water Use_Monthly_Incidental Power_Unspecified
3,in13,CAssdw_M1,Monthly,Water Use,CAssdw_O1,,,,Sly Park Creek,wadeId1,Unspecified,,DD_NE,El Dorado,4326,,180400130101.00000,18040013.00000,38.71507,-120.56160,,,POD,,40933.0,,"Point of Direct Diversion,Point of Storage - U...",CA,,,,A002270,Incidental Power,,,,,,,,,,,Incidental Power,2021,,2021-01-01,2021-01-01,Water Use_Monthly_Incidental Power_Unspecified
4,in14,CAssdw_M1,Monthly,Water Use,CAssdw_O1,,,,Sly Park Creek,wadeId1,Unspecified,,DD_NE,El Dorado,4326,,180400130101.00000,18040013.00000,38.71507,-120.56160,,,POD,,40933.0,,"Point of Direct Diversion,Point of Storage - U...",CA,,,,A002270,Incidental Power,,,,,,,,,,,Incidental Power,2022,,2022-01-01,2022-01-01,Water Use_Monthly_Incidental Power_Unspecified
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4889269,in15015536,CAssdw_M1,Monthly,Water Use,CAssdw_O1,,,,Dashiell Creek,wadeId7348,Surface Water,,GIS_LL,Lake,4326,,180101030502.00000,18010103.00000,39.41650,-123.05230,,,POD,,62673.0,,Point of Direct Diversion,CA,,,,S024415,Domestic,,,,,,,,,,,Domestic,2018,,2018-08-01,2018-08-01,Water Use_Monthly_Domestic_Surface Water
4889270,in15015537,CAssdw_M1,Monthly,Water Use,CAssdw_O1,,,,Dashiell Creek,wadeId7348,Surface Water,,GIS_LL,Lake,4326,,180101030502.00000,18010103.00000,39.41650,-123.05230,,,POD,,62673.0,,Point of Direct Diversion,CA,,,,S024415,Domestic,,,,,,,,,,,Domestic,2018,,2018-09-01,2018-09-01,Water Use_Monthly_Domestic_Surface Water
4889271,in15015538,CAssdw_M1,Monthly,Water Use,CAssdw_O1,,,,Dashiell Creek,wadeId7348,Surface Water,,GIS_LL,Lake,4326,,180101030502.00000,18010103.00000,39.41650,-123.05230,,,POD,,62673.0,,Point of Direct Diversion,CA,,,,S024415,Domestic,,,,,,,,,,,Domestic,2018,,2018-10-01,2018-10-01,Water Use_Monthly_Domestic_Surface Water
4889272,in15015539,CAssdw_M1,Monthly,Water Use,CAssdw_O1,,,,Dashiell Creek,wadeId7348,Surface Water,,GIS_LL,Lake,4326,,180101030502.00000,18010103.00000,39.41650,-123.05230,,,POD,,62673.0,,Point of Direct Diversion,CA,,,,S024415,Domestic,,,,,,,,,,,Domestic,2018,,2018-11-01,2018-11-01,Water Use_Monthly_Domestic_Surface Water


In [34]:
# Export the output dataframe
outdf.to_csv('RawInputData/Pssdw_Main.zip', compression=dict(method='zip', archive_name='Pssdw_Main.csv'), index=False)  # The output, save as a zip