# Pre-processing Oregon Allocation data for WaDE upload.

Purpose:  To pre-process the Oregon data into one master file for simple DataFrame creation and extraction

Useful Links to Data:

- Data Avalaible (use 'Statewide Water Right Spatial Data with Metadata'): https://www.oregon.gov/OWRD/access_Data/Pages/Data.aspx

- POD metadata: https://arcgis.wrd.state.or.us/data/wr_pod_metadata.pdfPOD

- POU metadata: https://arcgis.wrd.state.or.us/data/wr_pou_metadata.pdfPOD

In [None]:
#Needed Libararies

# working with data
import os
import numpy as np
import pandas as pd
import geopandas as gpd

# visulizaiton
import matplotlib.pyplot as plt
import seaborn as sns

# API retrieval
import requests
import json

# Cleanup
import re
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook
pd.set_option('display.float_format', lambda x: '%.5f' % x) # suppress scientific notation in Pandas

In [None]:
# Working Directory
workingDir = "G:/Shared drives/WaDE Data/Oregon/WaterAllocation/RawInputData"
os.chdir(workingDir)

## Point of Diversoin Data

In [None]:
# import pod data
inputFile = 'shapefile/wr_v_pod_public.zip'
dfinPOD = gpd.read_file(inputFile).replace(np.nan, "").replace("nan,nan", "") #geodataframe read
dfinPOD = dfinPOD.drop(['geometry'], axis=1)

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfinPOD:
    dfinPOD['WaDEUUID'] = "orD" + dfinPOD.index.astype(str)
    dfinPOD.to_csv('wr_v_pod_public.zip', compression=dict(method='zip', archive_name='wr_v_pod_public.csv'), index=False)

dfinPOD = dfinPOD.drop_duplicates().reset_index(drop=True)
print(len(dfinPOD))
dfinPOD.head()

In [None]:
# For creating SiteTypeCV
STCVDict = {
"LK" : "lake",
"DR" : "drain",
"SP" : "spring",
"ST" : "stream",
"SL" : "slough",
"WW" : "waste water",
"WE" : "well",
"WR" : "winter runoff",
"SM" : "sump",
"PD" : "pond",
"RS" : "reservoir",
"DT" : "ditch",
"SE" : "sewage effluent",
"CN" : "canal"}
def assignSiteTypeCV(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = ""
    else:
        String1 = colrowValue.strip()
        try:
            outList = STCVDict[String1]
        except:
            outList = ""
    return outList

dfinPOD['in_SiteTypeCV'] = dfinPOD.apply(lambda row: assignSiteTypeCV(row['source_typ']), axis=1)
dfinPOD['in_SiteTypeCV'].unique()

In [None]:
# Creating Ownername.
# Concatenating first and last name of individual.
# Determining if company is available, split string.
# combine together for output.

import re

# first & last name funciton
def assignownerName(colrowValue1, colrowValue2):
    if colrowValue1 == '' or pd.isnull(colrowValue1):
        outList1 = ''
    else:
        outList1 = colrowValue1.strip()  # remove whitespace chars
    if colrowValue2 == '' or pd.isnull(colrowValue2):
        outList2 = ''
    else:
        outList2 = colrowValue2.strip()  # remove whitespace chars

    if outList1 == '' and outList2 == '':
        outList = ''
    elif outList1 == '':
        outList = outList2
    elif outList2 == '':
        outList = outList1
    else:
        outList = " ".join(map(str, [colrowValue1, colrowValue2]))
    return outList


# Business name and Concatenate
def assignownerNameORCompany(buisName, fName, lName):
    
    # Concatenating First and Last name together.
    frilasName = assignownerName(fName, lName)
    
    # Clearn Company Name Entry
    if buisName == "" or pd.isnull(buisName):
        outBuisString = ""
    else:
        buisName = str(buisName).strip()
        if ";" in buisName:
            xList = buisName.split(";")
            for index, item in enumerate(xList):
                if "," in item:
                    list1 = item.split(",")
                    list1.reverse()
                    xList[index] = "".join(list1)
                else:
                    xList[index] = item
            outBuisString = ",".join(xList)
        elif "," in buisName:
            xList = buisName.split(",")
            outBuisString = str(xList[0]).strip() + "," + str(xList[1]).strip()
        else:
            outBuisString = buisName
    
    #Concatenating together, create outString
    if frilasName == ""  or pd.isnull(frilasName):
        if outBuisString == ""  or pd.isnull(outBuisString):
            outString = ""
        else:
            outString = outBuisString
    else:
        if outBuisString == ""  or pd.isnull(outBuisString):
            outString = frilasName
        else:
            outString = frilasName + ", " + outBuisString
        
    outString = outString.strip()
    outString = re.sub("[$@&.;,/\)(-]", "", outString).replace("  ", " ").title().strip()
    
    return outString

dfinPOD['in_AllocationOwner'] = dfinPOD.apply(lambda row: assignownerNameORCompany(row['name_compa'], row['name_first'], row['name_last']), axis=1)
dfinPOD['in_AllocationOwner'].unique()

In [None]:
#Determining AllocationTimeframe Start & End time for each site.

def formatDateString(inString1, inString2):
    #print(inString)
    try:
        valndf = str(int(inString1)).strip() + '/' + str(int(inString2)).strip()
    except:
        valndf = ''

    return valndf;

dfinPOD['in_AllocationTimeframeStart'] = dfinPOD.apply(lambda row: formatDateString(row['begin_mont'], row['begin_day']), axis=1)
dfinPOD['in_AllocationTimeframeEnd'] = dfinPOD.apply(lambda row: formatDateString(row['end_month'], row['end_day']), axis=1)

In [None]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = dfinPOD['WaDEUUID']

# Method Info
df['in_MethodUUID'] = "ORwr_M1"

# Variable Info
df['in_VariableSpecificUUID'] = "ORwr_V1"

# Organization Info
df['in_OrganizationUUID'] = "ORwr_O1"

# WaterSource Info
df['in_Geometry'] = ""
df['in_GNISFeatureNameCV'] = ""
df['in_WaterQualityIndicatorCV'] = ""
df['in_WaterSourceName'] = dfinPOD['source']
df['in_WaterSourceNativeID'] = ""
df['in_WaterSourceTypeCV'] = dfinPOD['wr_type']

# Site Info
df['in_CoordinateAccuracy'] = ""
df['in_CoordinateMethodCV'] = ""
df['in_County'] = ""
df['in_EPSGCodeCV'] = 4326
df['in_Geometry'] = ""
df['in_GNISCodeCV'] = ""
df['in_HUC12'] = ""
df['in_HUC8'] = ""
df['in_Latitude'] = dfinPOD['Latitude']
df['in_Longitude'] = dfinPOD['Longitude']
df['in_NHDNetworkStatusCV'] = ""
df['in_NHDProductCV'] = ""
df['in_PODorPOUSite'] = "POD"
df['in_SiteName'] = ""
df['in_SiteNativeID'] = "POD" + dfinPOD['pod_locati'].replace("", 0).fillna(0).astype(int).astype(str).str.strip()
df['in_SitePoint'] = ""
df['in_SiteTypeCV'] = dfinPOD['in_SiteTypeCV']
df['in_StateCV'] = "OR"
df['in_USGSSiteID'] = ""

# AllocationAmount Info
df['in_AllocationApplicationDate'] = ""
df['in_AllocationAssociatedConsumptiveUseSiteIDs'] = ""
df['in_AllocationAssociatedWithdrawalSiteIDs'] = ""
df['in_AllocationBasisCV'] = ""
df['in_AllocationChangeApplicationIndicator'] = ""
df['in_AllocationCommunityWaterSupplySystem'] = ""
df['in_AllocationCropDutyAmount'] = dfinPOD['duty']
df['in_AllocationExpirationDate'] = ""
df['in_AllocationFlow_CFS'] = dfinPOD['rate_cfs']
df['in_AllocationLegalStatusCV'] = ""
df['in_AllocationNativeID'] =  dfinPOD['snp_id'].replace("", 0).fillna(0).astype(int).astype(str).str.strip()
df['in_AllocationOwner'] = dfinPOD['in_AllocationOwner']
df['in_AllocationPriorityDate'] = dfinPOD['priority_d']
df['in_AllocationSDWISIdentifierCV'] = ""
df['in_AllocationTimeframeEnd'] = dfinPOD['in_AllocationTimeframeEnd']
df['in_AllocationTimeframeStart'] = dfinPOD['in_AllocationTimeframeStart']
df['in_AllocationTypeCV'] = ""
df['in_AllocationVolume_AF'] = dfinPOD['acre_feet']
df['in_BeneficialUseCategory'] = dfinPOD['use_code_d']
df['in_CommunityWaterSupplySystem'] = ""
df['in_CropTypeCV'] = ""
df['in_CustomerTypeCV'] = ""
df['in_DataPublicationDate'] = ""
df['in_DataPublicationDOI'] = ""
df['in_ExemptOfVolumeFlowPriority'] = 0
df['in_GeneratedPowerCapacityMW'] = ""
df['in_IrrigatedAcreage'] = ""
df['in_IrrigationMethodCV'] = ""
df['in_LegacyAllocationIDs'] = ""
df['in_OwnerClassificationCV'] = ""
df['in_PopulationServed'] = ""
df['in_PowerType'] = ""
df['in_PrimaryBeneficialUseCategory'] = ""
df['in_SDWISIdentifierCV'] = ""
df['in_WaterAllocationNativeURL'] = dfinPOD['wris_link']  #for WaterAllocationNativeURL

outPOD = df.copy()
outPOD = outPOD.drop_duplicates().reset_index(drop=True).replace(np.nan, '')
print(len(outPOD))
outPOD.head()

## Place of Use Data

In [None]:
# Input File - PoU Shapefile Data
# export dataframe as zipped csv
pouInput = 'shapefile/wr_v_pou_public.zip'
dfinPOU = gpd.read_file(pouInput).replace(np.nan, "").replace("nan,nan", "") #geodataframe read
dfinPOU = dfinPOU.drop(['geometry'], axis=1)

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfinPOU:
    dfinPOU['WaDEUUID'] = "orU" + dfinPOU.index.astype(str)
    dfinPOU.to_csv('wr_v_pou_public.zip', compression=dict(method='zip', archive_name='wr_v_pou_public.csv'), index=False)

print(len(dfinPOU))
dfinPOU.head(1)

In [None]:
# POU data is missing key inputs, will combine with POD data to fill in missing gaps.
dfinPOU['snp_id'] = dfinPOU['snp_id'].replace("", 0).fillna(0).astype(str).str.strip()  #for AllocationNativeID

dfinPOU = pd.merge(dfinPOU, outPOD, left_on='snp_id', right_on='in_AllocationNativeID', how='left')
print(len(dfinPOU))
dfinPOU.head()

In [None]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = dfinPOU['WaDEUUID_x']

# Method Info
df['in_MethodUUID'] = "ORwr_M1"

# Variable Info
df['in_VariableSpecificUUID'] = "ORwr_V1"

# Organization Info
df['in_OrganizationUUID'] = "ORwr_O1"

# WaterSource Info
df['in_Geometry'] = ""
df['in_GNISFeatureNameCV'] = ""
df['in_WaterQualityIndicatorCV'] = ""
df['in_WaterSourceName'] = dfinPOU['in_WaterSourceName'] # from POD
df['in_WaterSourceNativeID'] = ""
df['in_WaterSourceTypeCV'] = dfinPOU['in_WaterSourceTypeCV'] # from POD

# Site Info
df['in_CoordinateAccuracy'] = ""
df['in_CoordinateMethodCV'] = ""
df['in_County'] = ""
df['in_EPSGCodeCV'] = 4326
df['in_Geometry'] = ""
df['in_GNISCodeCV'] = ""
df['in_HUC12'] = ""
df['in_HUC8'] = ""
df['in_Latitude'] = dfinPOU['cent_Latit']
df['in_Longitude'] = dfinPOU['cent_Longi']
df['in_NHDNetworkStatusCV'] = ""
df['in_NHDProductCV'] = ""
df['in_PODorPOUSite'] = "POU"
df['in_SiteName'] = ""
df['in_SiteNativeID'] = "POU" + dfinPOU['pou_use_id'].replace("", 0).fillna(0).astype(int).astype(str).str.strip()
df['in_SitePoint'] = ""
df['in_SiteTypeCV'] = ""
df['in_StateCV'] = "OR"
df['in_USGSSiteID'] = ""

# AllocationAmount Info
df['in_AllocationApplicationDate'] = ""
df['in_AllocationAssociatedConsumptiveUseSiteIDs'] = ""
df['in_AllocationAssociatedWithdrawalSiteIDs'] = ""
df['in_AllocationBasisCV'] = ""
df['in_AllocationChangeApplicationIndicator'] = ""
df['in_AllocationCommunityWaterSupplySystem'] = ""
df['in_AllocationCropDutyAmount'] = dfinPOU['in_AllocationCropDutyAmount'] # from POD
df['in_AllocationExpirationDate'] = ""
df['in_AllocationFlow_CFS'] = dfinPOU['in_AllocationFlow_CFS'] # From POD
df['in_AllocationLegalStatusCV'] = ""
df['in_AllocationNativeID'] =  dfinPOU['snp_id'].replace("", 0).fillna(0).astype(int).astype(str).str.strip()
df['in_AllocationOwner'] = dfinPOU['in_AllocationOwner'] # from POD
df['in_AllocationPriorityDate'] = dfinPOU['priority_d']
df['in_AllocationSDWISIdentifierCV'] = ""
df['in_AllocationTimeframeEnd'] = dfinPOU['in_AllocationTimeframeEnd'] # from POD
df['in_AllocationTimeframeStart'] = dfinPOU['in_AllocationTimeframeStart'] # from POD
df['in_AllocationTypeCV'] = ""
df['in_AllocationVolume_AF'] = dfinPOU['in_AllocationVolume_AF']  # from POD
df['in_BeneficialUseCategory'] = dfinPOU['use_code_d']
df['in_CommunityWaterSupplySystem'] = ""
df['in_CropTypeCV'] = ""
df['in_CustomerTypeCV'] = ""
df['in_DataPublicationDate'] = ""
df['in_DataPublicationDOI'] = ""
df['in_ExemptOfVolumeFlowPriority'] = 0
df['in_GeneratedPowerCapacityMW'] = ""
df['in_IrrigatedAcreage'] = dfinPOU['wris_acres']
df['in_IrrigationMethodCV'] = ""
df['in_LegacyAllocationIDs'] = ""
df['in_OwnerClassificationCV'] = ""
df['in_PopulationServed'] = ""
df['in_PowerType'] = ""
df['in_PrimaryBeneficialUseCategory'] = ""
df['in_SDWISIdentifierCV'] = ""
df['in_WaterAllocationNativeURL'] = dfinPOU['in_WaterAllocationNativeURL']  #for WaterAllocationNativeURL

outPOU = df.copy()
outPOU = outPOU.drop_duplicates().reset_index(drop=True).replace(np.nan, '')
print(len(outPOU))
outPOU.head()

## Concatenate POD and POU data

In [None]:
# Concatenate dataframes
frames = [outPOD, outPOU]
outdf = pd.concat(frames)
outdf = outdf.drop_duplicates().reset_index(drop=True).replace(np.nan, "")
print(len(outdf))

## Custom WaDE Elements due to missing info

In [None]:
# For creating WaterSourceTypeCV
WSTypeDict = {
    "ST": "Storage",
    "GW": "Groundwater",
    "SW": "Surface Water"}
def assignWaterSourceTypeCV(colrowValue):
    colrowValue = str(colrowValue).strip()
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = "WaDE Blank"
    else:
        try:
            outList = WSTypeDict[colrowValue]
        except:
            outList = "WaDE Blank"
    return outList

outdf['in_WaterSourceTypeCV'] = outdf.apply(lambda row: assignWaterSourceTypeCV(row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceTypeCV'].unique()

In [None]:
#Fixing Beneficial Uses PRIMARY_PURPOSE

def fixBenUse(val):
    val = str(val).strip()
    if val == "IRRIGATION, LIVESTOCK AND DOMESTIC":
        outString = "IRRIGATION, LIVESTOCK, DOMESTIC"
    elif val == "IRRIGATION AND LIVESTOCK":
        outString = "IRRIGATION, LIVESTOCK"
    elif val == "LIVESTOCK AND WILDLIFE":
        outString = "LIVESTOCK, WILDLIFE"
    elif val == "DOMESTIC AND LIVESTOCK":
        outString = "DOMESTIC, LIVESTOCK"
    elif val == "IRRIGATION AND DOMESTIC":
        outString = "IRRIGATION, DOMESTIC"
    elif val == "HUMAN CONSUMPTION AND LIVESTOCK":
        outString = "HUMAN CONSUMPTION, LIVESTOCK"
    else:
        outString = val
    return outString

outdf['in_BeneficialUseCategory'] = outdf.apply(lambda row: fixBenUse(row['in_BeneficialUseCategory']), axis=1)
for x in outdf['in_BeneficialUseCategory'].sort_values().unique():
    print(f'"' + x + '",')

In [None]:
# Clean owner name up
def cleanOwnerDataFunc(Val):
    Val = str(Val)
    Val = re.sub("[$@&.;/\)(-]", "", Val).title().strip()
    return Val

In [None]:
outdf['in_AllocationOwner'] = outdf.apply(lambda row: cleanOwnerDataFunc(row['in_AllocationOwner']), axis=1)
outdf['in_AllocationOwner'].unique()

In [None]:
outdf['in_WaterSourceName'] = outdf.apply(lambda row: cleanOwnerDataFunc(row['in_WaterSourceName']), axis=1)
outdf['in_WaterSourceName'].unique()

In [None]:
outdf['in_SiteName'] = outdf.apply(lambda row: cleanOwnerDataFunc(row['in_SiteName']), axis=1)
outdf['in_SiteName'].unique()

In [None]:
# Ensure Empty String

def ensureEmptyString(val):
    val = str(val).strip()
    if val == "" or val == " " or val == "nan" or pd.isnull(val):
        outString = ""
    else:
        outString = val
    return outString

In [None]:
outdf['in_WaterSourceName'] = outdf.apply(lambda row: ensureEmptyString(row['in_WaterSourceName']), axis=1)
outdf['in_WaterSourceName'].unique()

In [None]:
outdf['in_WaterSourceTypeCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceTypeCV'].unique()

In [None]:
outdf['in_SiteName'] = outdf.apply(lambda row: ensureEmptyString(row['in_SiteName']), axis=1)
outdf['in_SiteName'].unique()

In [None]:
outdf['in_SiteTypeCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_SiteTypeCV']), axis=1)
outdf['in_SiteTypeCV'].unique()

In [None]:
outdf['in_AllocationLegalStatusCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_AllocationLegalStatusCV']), axis=1)
outdf['in_AllocationLegalStatusCV'].unique()

In [None]:
outdf['in_AllocationOwner'] = outdf.apply(lambda row: ensureEmptyString(row['in_AllocationOwner']), axis=1)
outdf['in_AllocationOwner'].unique()

In [None]:
outdf['in_BeneficialUseCategory'] = outdf.apply(lambda row: ensureEmptyString(row['in_BeneficialUseCategory']), axis=1)
outdf['in_BeneficialUseCategory'].unique()

In [None]:
# in_Latitude
outdf['in_Latitude'] = pd.to_numeric(outdf['in_Latitude'], errors='coerce').fillna("")
outdf['in_Latitude'].unique()

In [None]:
# in_Longitude
outdf['in_Longitude'] = pd.to_numeric(outdf['in_Longitude'], errors='coerce').fillna("")
outdf['in_Longitude'].unique()

In [None]:
#Update datatype of Priority Date to fit WaDE 2.0 structure
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf['in_AllocationPriorityDate'], errors = 'coerce')
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf['in_AllocationPriorityDate'].dt.strftime('%m/%d/%Y'))
outdf['in_AllocationPriorityDate'].unique()

In [None]:
# Fixing in_AllocationFlow_CFS datatype
outdf['in_AllocationFlow_CFS'] = pd.to_numeric(outdf['in_AllocationFlow_CFS'], errors='coerce').replace(0,"").fillna("")
outdf['in_AllocationFlow_CFS'].unique()

In [None]:
# Fixing in_AllocationVolume_AF datatype
outdf['in_AllocationVolume_AF'] = pd.to_numeric(outdf['in_AllocationVolume_AF'], errors='coerce').replace(0,"").fillna("")
outdf['in_AllocationVolume_AF'].unique()

In [None]:
# Fixing in_AllocationCropDutyAmount datatype
outdf['in_AllocationCropDutyAmount'] = pd.to_numeric(outdf['in_AllocationCropDutyAmount'], errors='coerce').replace(0,"").fillna("")
outdf['in_AllocationCropDutyAmount'].unique()

In [None]:
# Fixing in_IrrigatedAcreage datatype
outdf['in_IrrigatedAcreage'] = pd.to_numeric(outdf['in_IrrigatedAcreage'], errors='coerce').replace(0,"").fillna("")
outdf['in_IrrigatedAcreage'].unique()

In [None]:
# Creating WaDE Custom water source native ID for easy water source identification
# ----------------------------------------------------------------------------------------------------

# Create temp WaterSourceNativeID dataframe of unique water source.
def assignWaterSourceNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "wadeID" + string1
    return outstring

dfWaterSourceNativeID = pd.DataFrame()
dfWaterSourceNativeID['in_WaterSourceName'] = outdf['in_WaterSourceName']
dfWaterSourceNativeID['in_WaterSourceTypeCV'] = outdf['in_WaterSourceTypeCV']
dfWaterSourceNativeID = dfWaterSourceNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfWaterSourceNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfWaterSourceNativeID['in_WaterSourceNativeID'] = dftemp.apply(lambda row: assignWaterSourceNativeID(row['Count']), axis=1)
dfWaterSourceNativeID['linkKey'] = dfWaterSourceNativeID['in_WaterSourceName'].astype(str) + dfWaterSourceNativeID['in_WaterSourceTypeCV'].astype(str)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom water source native ID
WaterSourceNativeIDdict = pd.Series(dfWaterSourceNativeID.in_WaterSourceNativeID.values, index=dfWaterSourceNativeID.linkKey.astype(str)).to_dict()
def retrieveWaterSourceNativeID(A, B):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        colrowValue = str(A).strip() + str(B).strip()
        try:
            outList = WaterSourceNativeIDdict[colrowValue]
        except:
            outList = ''
    return outList

outdf['in_WaterSourceNativeID'] = outdf.apply(lambda row: retrieveWaterSourceNativeID( row['in_WaterSourceName'], row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceNativeID'].unique()

## Shapefile Data
- For attaching gemetry to csv inputs.

In [None]:
# PoU Shapefile Data
# Shapefile input
dfPoUshapetemp = gpd.read_file('shapefile/wr_v_pou_public.zip')
dfPoUshapetemp.head(3)

In [None]:
# check if dupbliate siteNative IDs
dfPoUshapetemp['pou_use_id'] = np.where(dfPoUshapetemp['pou_use_id'].duplicated(keep=False),
                                        dfPoUshapetemp['pou_use_id'].astype(str).str.cat(dfPoUshapetemp.groupby('pou_use_id').cumcount().add(1).astype(str), sep='_'),
                                        dfPoUshapetemp['pou_use_id'])

print(dfPoUshapetemp['pou_use_id'].nunique())
dfPoUshapetemp['pou_use_id'].unique()

In [None]:
columnsList = ['in_SiteNativeID', 'geometry']
dfPoUshape = pd.DataFrame(columns=columnsList)
dfPoUshape['in_SiteNativeID'] = "POU" + dfPoUshapetemp['pou_use_id'].replace("", 0).fillna(0).astype(str)
dfPoUshape['geometry'] = dfPoUshapetemp['geometry']
dfPoUshape = dfPoUshape.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)
dfPoUshape.head(3)

## The Output

In [None]:
outdf.info()

In [None]:
outdf

In [None]:
# Export the output dataframe
outdf.to_csv('Pwr_orMain.zip', compression=dict(method='zip', archive_name='Pwr_orMain.csv'), index=False)  # The output, save as a zip
dfPoUshape.to_csv('P_Geometry.zip', compression=dict(method='zip', archive_name='P_Geometry.csv'), index=False)  # The output geometry.