# Preprocessing North Dakota Specific data for WaDEQA upload.
- Date Updated: 05/25/2022

Notes:
- Water Use Data POD data.
- Available data...
    - Permit_Header.csv
    - POD.csv
    - Water_Use.csv
- Match ts water use data -> POD data via POD_Index -> Permit data via Permit_Index.

In [6]:
# working with data
import os
import numpy as np
import pandas as pd
import geopandas as gpd # the library that lets us read in shapefiles

# visulizaiton
import matplotlib.pyplot as plot
import seaborn as sns

# Cleanup
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [7]:
# Working Directory and Input File
workingDir = "G:/Shared drives/WaDE Data/NorthDakota/SiteSpecificAmounts/RawInputData"
os.chdir(workingDir)

## Inputs and Dataframe Creation

In [9]:
# Timeseries water use data
fileInput = "Water_Use.xlsx"
df_wu = pd.read_excel(fileInput)

df_wu['Permit_Index'] = df_wu['Permit_Index'].astype('Int64').astype('str')
df_wu['Use_Year'] = df_wu['Use_Year'].astype('Int64').astype('str')

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in df_wu:
    df_wu['WaDEUUID'] = "ndWU" + df_wu.index.astype(str)
    df_wu.to_excel('Water_Use.xlsx', index=False)

print(len(df_wu))
df_wu.head(1)

205766


Unnamed: 0,Permit_Index,POD_Index,Use_Year,Water_Use_Index,Nature_Of_Data,Reported_AcFt,Reported_Acres,Reported_Rate,KWHrs,KWH_Demand,Pump_HP,Begin_Meter,End_Meter,Meter_Units,Comments,NonConsumptive_Use,Crop_type1,Crop_Type2,Reported_Inches,Use_Type,WaDEUUID
0,1217,-1,1979,215852.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,December total is Total Annual Use minus cumul...,0.0,,,,Municipal,ndWU0


In [10]:
# POD site data
fileInput = "POD.xlsx"
df_pod = pd.read_excel(fileInput)

df_pod = df_pod[df_pod['POD_Status'] == 'Active']

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in df_pod:
    df_pod['WaDEUUID'] = "ndPD" + df_pod.index.astype(str)
    df_pod.to_excel('POD.xlsx', index=False)

print(len(df_pod))
df_pod.head(1)

5475


Unnamed: 0,Permit_Index,POD_Index,POD,Beneficial_Use,County,Column1,_1,Aquifer,SubAquifer,Req_AcFt,Req_Acre,Req_Rate,Req_Storage,App_AcFt,App_Acre,App_Rate,App_Storage,POD_Status,Source,Irrigation_Type,Source_Name,MainStem_Name,Impound_Location,Impound_Name,Return_Dest,Discharge_Locat,Prop_Owner,Dest_Prop_Owner,Period_Start,Period_End,Return_Quantity,Held_AcFt,Held_Acre,Held_Rate,Held_Storage,Longitude,Latitude,HU_Basin,HU_Sub_Basin,HU_Watershed,HU_Sub_Watershed,Civil_Township,X_Coord,Y_Coord,NonConsumptive_ReqAcFt,NonConsumptive_AppAcFt,NonConsumptive_HeldAcFt,WaDEUUID
3,4,4,14910026AB,07/01/37,McKenzie,,,,,291.0,291.0,1615.6,0.0,291.0,291.0,1615.6,0.0,Active,Surface Water,Flooding,,,,,,,,,,,0,0,0,0,0,-103.44126,47.70176,Little Missouri,Lower Little Missouri,Cherry Creek,Headwaters Cherry Creek,Unorganized Territory,1244378,755775,0,0,0,ndPD3


In [11]:
# Permit_Header data
fileInput = "Permit_Header.xlsx"
df_ph = pd.read_excel(fileInput)

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in df_ph:
    df_ph['WaDEUUID'] = "ndPH" + df_ph.index.astype(str)
    df_ph.to_excel('Permit_Header.xlsx', index=False)

print(len(df_ph))
df_ph.head(1)

9050


Unnamed: 0,Permit_Index,Permit_Number,Permit_Holder_Name,Address1,Address2,City,State,Zip,Priority_Date,Use_Type,Status,Date_Issued,Date_Cancelled,Req_AcFt,Req_Acre,Req_Rate,App_AcFt,App_Acre,App_Rate,Beneficial_Use,Reservation,Project_Name,Hearing_Date,Hearing_Time,Const_Perm_No,Start_Date,Comp_Date,No_Notify,Use_Description,NonConsumptive_ReqAcFt,NonConsumptive_AppAcFt,Date_Perfected,Remarks,Req_Storage,App_Storage,Held_AcFt,Held_Acre,Held_Rate,Held_Storage,Comment_Deadline,NonConsumptive_HeldAcFt,Last_Inspected,Depot_ID,WaDEUUID
0,4,2D,"GUDMUNSEN, ROBERT AND LOWRAINE",1952 134TH AVE NW,,ARNEGARD,ND,58835-9162,01/26/1906,Irrigation,Perfected,04/30/37,00/00/00,291.0,291.0,1615.6,291.0,291.0,1615.6,00/00/00,,,00/00/00,0.0,,00/00/00,00/00/00,0.0,,0.0,0.0,02/17/93,,0.0,0.0,0.0,0.0,0.0,0.0,00/00/00,0.0,00/00/00,0.0,ndPH0


In [12]:
# Left-Join data
df = pd.merge(df_wu, df_pod, on='POD_Index', how='left')
df = df.merge(df_ph, left_on='Permit_Index_x', right_on='Permit_Index', how='left')
df = df.replace(np.nan, "").reset_index(drop=True)

print(len(df))
df.head(1)

205766


Unnamed: 0,Permit_Index_x,POD_Index,Use_Year,Water_Use_Index,Nature_Of_Data,Reported_AcFt,Reported_Acres,Reported_Rate,KWHrs,KWH_Demand,Pump_HP,Begin_Meter,End_Meter,Meter_Units,Comments,NonConsumptive_Use,Crop_type1,Crop_Type2,Reported_Inches,Use_Type_x,WaDEUUID_x,Permit_Index_y,POD,Beneficial_Use_x,County,Column1,_1,Aquifer,SubAquifer,Req_AcFt_x,Req_Acre_x,Req_Rate_x,Req_Storage_x,App_AcFt_x,App_Acre_x,App_Rate_x,App_Storage_x,POD_Status,Source,Irrigation_Type,Source_Name,MainStem_Name,Impound_Location,Impound_Name,Return_Dest,Discharge_Locat,Prop_Owner,Dest_Prop_Owner,Period_Start,Period_End,Return_Quantity,Held_AcFt_x,Held_Acre_x,Held_Rate_x,Held_Storage_x,Longitude,Latitude,HU_Basin,HU_Sub_Basin,HU_Watershed,HU_Sub_Watershed,Civil_Township,X_Coord,Y_Coord,NonConsumptive_ReqAcFt_x,NonConsumptive_AppAcFt_x,NonConsumptive_HeldAcFt_x,WaDEUUID_y,Permit_Index,Permit_Number,Permit_Holder_Name,Address1,Address2,City,State,Zip,Priority_Date,Use_Type_y,Status,Date_Issued,Date_Cancelled,Req_AcFt_y,Req_Acre_y,Req_Rate_y,App_AcFt_y,App_Acre_y,App_Rate_y,Beneficial_Use_y,Reservation,Project_Name,Hearing_Date,Hearing_Time,Const_Perm_No,Start_Date,Comp_Date,No_Notify,Use_Description,NonConsumptive_ReqAcFt_y,NonConsumptive_AppAcFt_y,Date_Perfected,Remarks,Req_Storage_y,App_Storage_y,Held_AcFt_y,Held_Acre_y,Held_Rate_y,Held_Storage_y,Comment_Deadline,NonConsumptive_HeldAcFt_y,Last_Inspected,Depot_ID,WaDEUUID
0,1217,-1,1979,215852.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,December total is Total Annual Use minus cumul...,0.0,,,,Municipal,ndWU0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1217,1469,"NAPOLEON, CITY OF",PO BOX 31,,NAPOLEON,ND,58561,07/29/67,Municipal,Perfected,08/21/67,00/00/00,346.0,0.0,500.0,346.0,0.0,1100.0,00/00/00,,,00/00/00,0.0,_x001B_,00/00/00,00/00/00,0.0,,0.0,0.0,07/06/73,,0.0,0.0,0.0,0.0,0.0,0.0,07/26/04,0.0,07/12/72,0.0,ndPH6076


## Time Series Data
- Exporting Monthly timeseries data.

In [13]:
# Return Data
# Create temporary main dataframe
dfout = pd.DataFrame(index=df.index)

# Variable Info
dfout['in_VariableCV'] = "Withdrawal"
dfout['in_VariableSpecificCV'] = "" # Timeseries specific.

# Water Source Info
dfout['in_WaterSourceTypeCV'] = df['Source']

# Site Info
dfout['in_County'] = df['County']
dfout['in_Latitude'] = df['Latitude']
dfout['in_Longitude'] = df['Longitude']
dfout['in_SiteNativeID'] = df['POD'].astype('str')

# Site Variable Amount Info
dfout['in_Amount'] = df['Reported_AcFt'] # will convert from AcFt to MG
dfout['in_AssociatedNativeAllocationIDs'] = df['Permit_Number'].astype(str)
dfout['in_BeneficialUseCategory'] = df['Use_Type_x']
dfout['in_CommunityWaterSupplySystem'] =  df['Civil_Township']
# dfout['in_CropTypeCV'] = df['Crop_type1']
# dfout['in_IrrigatedAcreage'] = df['Reported_Acres']
# dfout['in_IrrigationMethodCV'] = df['Irrigation_Type']
# dfout['in_PowerGeneratedGWh'] = df['KWHrs']
dfout['in_ReportYearCV'] =  df['Use_Year']
dfout['in_TimeframeStart'] = df['Use_Year'] + "/01/01"
dfout['in_TimeframeEnd'] = df['Use_Year']  + "/12/31"

print(len(dfout))
dfout.head(5)

205766


Unnamed: 0,in_VariableCV,in_VariableSpecificCV,in_WaterSourceTypeCV,in_County,in_Latitude,in_Longitude,in_SiteNativeID,in_Amount,in_AssociatedNativeAllocationIDs,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_ReportYearCV,in_TimeframeStart,in_TimeframeEnd
0,Withdrawal,,,,,,,0.0,1469,Municipal,,1979,1979/01/01,1979/12/31
1,Withdrawal,,,,,,,0.0,3005,Municipal,,1979,1979/01/01,1979/12/31
2,Withdrawal,,,,,,,0.0,1120,Municipal,,1979,1979/01/01,1979/12/31
3,Withdrawal,,,,,,,0.0,1469,Municipal,,1980,1980/01/01,1980/12/31
4,Withdrawal,,,,,,,0.0,3005,Municipal,,1980,1980/01/01,1980/12/31


## WaDE Custom Elements (due to missing info)

In [14]:
# updating in_WaterSourceTypeCV to be more machine readable / WaDE specific
# ----------------------------------------------------------------------------------------------------

def createWaterSourceTypeCV(inWST):
    inWST = str(inWST).strip()
    
    if inWST == "":
        outString = "Unspecified"
    elif inWST == "Ground Water":
        outString = "Groundwater"
    else:
        outString =  inWST
      
    return outString

dfout['in_WaterSourceTypeCV'] = dfout.apply(lambda row: createWaterSourceTypeCV(row['in_WaterSourceTypeCV']), axis=1)
dfout['in_WaterSourceTypeCV'].unique()

array(['Unspecified', 'Surface Water', 'Groundwater'], dtype=object)

In [15]:
# Creating WaDE Custom water source native ID for easy water source identification
# ----------------------------------------------------------------------------------------------------

# Create temp WaterSourceNativeID dataframe of unique water source.
def assignWaterSourceNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDEND_WS" + string1
    return outstring

dfWaterSourceNativeID = pd.DataFrame()
dfWaterSourceNativeID['in_WaterSourceTypeCV'] = dfout['in_WaterSourceTypeCV']
dfWaterSourceNativeID = dfWaterSourceNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfWaterSourceNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfWaterSourceNativeID['in_WaterSourceNativeID'] = dftemp.apply(lambda row: assignWaterSourceNativeID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom water source native ID
def retrieveWaterSourceNativeID(A):
    if (A == '') or (pd.isnull(A)):
        outList = ''
    else:
        ml = dfWaterSourceNativeID.loc[(dfWaterSourceNativeID['in_WaterSourceTypeCV'] == A), 'in_WaterSourceNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

dfout['in_WaterSourceNativeID'] = dfout.apply(lambda row: retrieveWaterSourceNativeID(row['in_WaterSourceTypeCV']), axis=1)
dfout['in_WaterSourceNativeID'].unique()

array(['WaDEND_WS1', 'WaDEND_WS2', 'WaDEND_WS3'], dtype=object)

In [16]:
# title format for beneficial use
# ----------------------------------------------------------------------------------------------------

def formatTitle(valA):
    valA = str(valA).strip().title()
    if (valA == "") or (pd.isnull(valA)):
        outString = "Unspecified"
    else:
        outString = valA
      
    return outString

dfout['in_BeneficialUseCategory'] = dfout.apply(lambda row: formatTitle(row['in_BeneficialUseCategory']), axis=1)
dfout['in_BeneficialUseCategory'].unique()

array(['Municipal', 'Rural Water', 'Industrial', 'Fish And Wildlife',
       'Power Generation', 'Multiple Use', 'Unspecified', 'Irrigation',
       'Stock', 'Recreation', 'Flood Control', 'Domestic', 'Commercial'],
      dtype=object)

In [17]:
# Creating WaDE Custom VariableSpecificCV
# ----------------------------------------------------------------------------------------------------

def createVariableSpecificCV(inV, inBU, inWST):
    inV = str(inV).strip()
    inBU = str(inBU).strip().title()
    inWST = str(inWST).strip()
    
    outString = inV + "_Annual_" +  inBU + "_" + inWST
    
    return outString

dfout['in_VariableSpecificCV'] = dfout.apply(lambda row: createVariableSpecificCV(row['in_VariableCV'], 
                                                                                     row['in_BeneficialUseCategory'],
                                                                                     row['in_WaterSourceTypeCV']), axis=1)
dfout['in_VariableSpecificCV'].unique()

array(['Withdrawal_Annual_Municipal_Unspecified',
       'Withdrawal_Annual_Rural Water_Unspecified',
       'Withdrawal_Annual_Industrial_Unspecified',
       'Withdrawal_Annual_Fish And Wildlife_Unspecified',
       'Withdrawal_Annual_Power Generation_Unspecified',
       'Withdrawal_Annual_Multiple Use_Unspecified',
       'Withdrawal_Annual_Unspecified_Unspecified',
       'Withdrawal_Annual_Irrigation_Unspecified',
       'Withdrawal_Annual_Irrigation_Surface Water',
       'Withdrawal_Annual_Industrial_Surface Water',
       'Withdrawal_Annual_Unspecified_Surface Water',
       'Withdrawal_Annual_Fish And Wildlife_Surface Water',
       'Withdrawal_Annual_Stock_Surface Water',
       'Withdrawal_Annual_Municipal_Surface Water',
       'Withdrawal_Annual_Recreation_Surface Water',
       'Withdrawal_Annual_Multiple Use_Surface Water',
       'Withdrawal_Annual_Recreation_Unspecified',
       'Withdrawal_Annual_Irrigation_Groundwater',
       'Withdrawal_Annual_Unspecified_Groundwa

## Groupby and Sum
- Issue of multiple withdrawl values from same sites by permit.  Will cheat for now and aggregate all values at the single site using our WaDE ss aggregation rules.

In [18]:
dfout2 = dfout.copy()
print(len(dfout2))
dfout2.head(1)

205766


Unnamed: 0,in_VariableCV,in_VariableSpecificCV,in_WaterSourceTypeCV,in_County,in_Latitude,in_Longitude,in_SiteNativeID,in_Amount,in_AssociatedNativeAllocationIDs,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_ReportYearCV,in_TimeframeStart,in_TimeframeEnd,in_WaterSourceNativeID
0,Withdrawal,Withdrawal_Annual_Municipal_Unspecified,Unspecified,,,,,0.0,1469,Municipal,,1979,1979/01/01,1979/12/31,WaDEND_WS1


In [19]:
groupbyList = ['in_SiteNativeID', 'in_VariableSpecificCV', 'in_TimeframeStart', 'in_TimeframeEnd']
dfout2 = dfout2.groupby(groupbyList).agg(lambda x: ','.join([str(elem) for elem in (list(set(x))) if elem!=''])).replace(np.nan, "").reset_index()
print(len(dfout2))
dfout2.head()

131837


Unnamed: 0,in_SiteNativeID,in_VariableSpecificCV,in_TimeframeStart,in_TimeframeEnd,in_VariableCV,in_WaterSourceTypeCV,in_County,in_Latitude,in_Longitude,in_Amount,in_AssociatedNativeAllocationIDs,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_ReportYearCV,in_WaterSourceNativeID
0,,Withdrawal_Annual_Commercial_Unspecified,1977/01/01,1977/12/31,Withdrawal,Unspecified,,,,1.9,2501,Commercial,,1977,WaDEND_WS1
1,,Withdrawal_Annual_Commercial_Unspecified,1978/01/01,1978/12/31,Withdrawal,Unspecified,,,,"1.8,3.0",25012959,Commercial,,1978,WaDEND_WS1
2,,Withdrawal_Annual_Commercial_Unspecified,1979/01/01,1979/12/31,Withdrawal,Unspecified,,,,"1.7,6.0",25012959,Commercial,,1979,WaDEND_WS1
3,,Withdrawal_Annual_Commercial_Unspecified,1980/01/01,1980/12/31,Withdrawal,Unspecified,,,,"1.7,6.0",25012959,Commercial,,1980,WaDEND_WS1
4,,Withdrawal_Annual_Commercial_Unspecified,1981/01/01,1981/12/31,Withdrawal,Unspecified,,,,"1.7,6.0",25012959,Commercial,,1981,WaDEND_WS1


## Cleaning Output
- checking & changing data type & format.

In [20]:
# Convert History Year to YYYY-MM-DD format.

dfout2['in_TimeframeEnd'] = pd.to_datetime(dfout2['in_TimeframeEnd'], errors = 'coerce')
dfout2['in_TimeframeEnd'] = pd.to_datetime(dfout2["in_TimeframeEnd"].dt.strftime('%m/%d/%Y'))

dfout2['in_TimeframeStart'] = pd.to_datetime(dfout2['in_TimeframeStart'], errors = 'coerce')
dfout2['in_TimeframeStart'] = pd.to_datetime(dfout2["in_TimeframeStart"].dt.strftime('%m/%d/%Y'))

dfout2.head(1)

Unnamed: 0,in_SiteNativeID,in_VariableSpecificCV,in_TimeframeStart,in_TimeframeEnd,in_VariableCV,in_WaterSourceTypeCV,in_County,in_Latitude,in_Longitude,in_Amount,in_AssociatedNativeAllocationIDs,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_ReportYearCV,in_WaterSourceNativeID
0,,Withdrawal_Annual_Commercial_Unspecified,1977-01-01,1977-12-31,Withdrawal,Unspecified,,,,1.9,2501,Commercial,,1977,WaDEND_WS1


In [21]:
# summing up the comma separated list of Amounts to one value.
def sumAmountsFunc(valA):
    valAList = valA.split(",")
    for x in valAList:
        if x == "" or "," in x:
            outString = x
        else:
            try:
                x = float(x)
                outString += x
            except:
                outString = x
                    
    return outString

dfout2['in_Amount'] = dfout2.apply(lambda row: sumAmountsFunc(row['in_Amount']), axis=1)
dfout2.head(1)

Unnamed: 0,in_SiteNativeID,in_VariableSpecificCV,in_TimeframeStart,in_TimeframeEnd,in_VariableCV,in_WaterSourceTypeCV,in_County,in_Latitude,in_Longitude,in_Amount,in_AssociatedNativeAllocationIDs,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_ReportYearCV,in_WaterSourceNativeID
0,,Withdrawal_Annual_Commercial_Unspecified,1977-01-01,1977-12-31,Withdrawal,Unspecified,,,,1.9,2501,Commercial,,1977,WaDEND_WS1


In [22]:
# Converting numbers that are in string to float.

# in_Latitude & in_Longitude
dfout2['in_Latitude'] = pd.to_numeric(dfout2['in_Latitude'], errors='coerce')
dfout2['in_Longitude'] = pd.to_numeric(dfout2['in_Longitude'], errors='coerce')

# in_Amount
dfout2['in_Amount'] = pd.to_numeric(dfout2['in_Amount'], errors='coerce')

# # in_PowerGeneratedGWh
# dfout2['in_PowerGeneratedGWh'] = pd.to_numeric(dfout2['in_PowerGeneratedGWh'], errors='coerce')

#in_ReportYearCV
# having some issues converting this to an int
dfout2['in_ReportYearCV'] = pd.to_numeric(dfout2['in_ReportYearCV'], errors='coerce')
dfout2['in_ReportYearCV'] = dfout2['in_ReportYearCV'].fillna(0).astype('int64')

dfout2.head(1)

Unnamed: 0,in_SiteNativeID,in_VariableSpecificCV,in_TimeframeStart,in_TimeframeEnd,in_VariableCV,in_WaterSourceTypeCV,in_County,in_Latitude,in_Longitude,in_Amount,in_AssociatedNativeAllocationIDs,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_ReportYearCV,in_WaterSourceNativeID
0,,Withdrawal_Annual_Commercial_Unspecified,1977-01-01,1977-12-31,Withdrawal,Unspecified,,,,1.9,2501,Commercial,,1977,WaDEND_WS1


In [23]:
# convert Amount AcFt to MG
def convertAmountFunc(valA):
    outVal = valA * 0.28002596920264
    return outVal

dfout2['in_Amount'] = dfout2.apply(lambda row: convertAmountFunc(row['in_Amount']), axis=1)
dfout2.head(1)

Unnamed: 0,in_SiteNativeID,in_VariableSpecificCV,in_TimeframeStart,in_TimeframeEnd,in_VariableCV,in_WaterSourceTypeCV,in_County,in_Latitude,in_Longitude,in_Amount,in_AssociatedNativeAllocationIDs,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_ReportYearCV,in_WaterSourceNativeID
0,,Withdrawal_Annual_Commercial_Unspecified,1977-01-01,1977-12-31,Withdrawal,Unspecified,,,,0.532049,2501,Commercial,,1977,WaDEND_WS1


## Export Outputfile(s)

In [24]:
dfout2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 131837 entries, 0 to 131836
Data columns (total 15 columns):
 #   Column                            Non-Null Count   Dtype         
---  ------                            --------------   -----         
 0   in_SiteNativeID                   131837 non-null  object        
 1   in_VariableSpecificCV             131837 non-null  object        
 2   in_TimeframeStart                 131830 non-null  datetime64[ns]
 3   in_TimeframeEnd                   131830 non-null  datetime64[ns]
 4   in_VariableCV                     131837 non-null  object        
 5   in_WaterSourceTypeCV              131837 non-null  object        
 6   in_County                         131837 non-null  object        
 7   in_Latitude                       130211 non-null  float64       
 8   in_Longitude                      130220 non-null  float64       
 9   in_Amount                         131831 non-null  float64       
 10  in_AssociatedNativeAllocationIDs

In [25]:
# Exporting output files.
dfout2.to_csv('P_ndSSMaster.csv', index=False)  # The master output.

#### bonus:
- checking processed sitespecificamounts.csv for duplicate and identifying why