In [1]:
import os
import pandas as pd
import arcpy
import numpy as np

### Import customer service dataset formatted from ASPA sheets

In [16]:
cs_data = pd.read_csv('Water_Usage_Sales.csv')

# Clean 
cs_data = cs_data[cs_data['LOSADD'].notnull()]  # Remove rows where there is no village name
cs_data = cs_data.rename(columns=lambda x: x.strip())   # remove whitespace from columns
cs_data['Usage'] = cs_data['Usage'].str.replace(" ","")  # Remove whitespace from the usage col
cs_data['Usage'] = cs_data['Usage'].str.replace("-","0")  # Rename zeros in the usage col
cs_data['Usage'] = cs_data['Usage'].str.replace(",","")  # Remove thusand commas from the usage col
cs_data['Usage'] = pd.to_numeric(cs_data['Usage'])  # Make usage a number 

# Here I am going to only take those meters that are on a residential rate  
# Not sure if this is the best idea but can double check later
cs_data = cs_data[cs_data['RSP'] == 'RES']

cs_data.head()

Unnamed: 0,Usage,Days of Usage,Billed Amount for Water,MTH/YR OF BILLING,Route#,LOSADD,RSP,Rate,LOMETW,Meter Size,Unnamed: 10,Unnamed: 11
0,0,28.0,15.21,202205.0,352.0,PAVAIAI,RES,R,90277756.0,"5/8""",,
1,190,28.0,16.03,202205.0,377.0,NUA,RES,R,90277755.0,"5/8""",,
2,21727,28.0,115.84,202205.0,352.0,PAVAIAI,RES,R,90277754.0,"5/8""",,
3,3914,28.0,32.13,202205.0,362.0,TAPUTIMU,RES,R,90277753.0,"5/8""",,
4,0,30.0,15.21,202205.0,301.0,NUUULI,RES,R,90277752.0,"5/8""",,


In [17]:
# read in data from shapefile and make reasonable pandas dataframe 

shp_path = os.path.join(".", 'shp', 'All_villages_Manua_Merged.shp')
columns_nams = [field.name for field in arcpy.ListFields(shp_path)]     # List of all col names
columns_nams.pop(1)  # remove stupid shape col                           # THe "Shape" col will make numpy array to pandas puke
temparr = arcpy.da.FeatureClassToNumPyArray(shp_path, columns_nams)     # convert to numpy recarray
shapefile_df = pd.DataFrame(temparr)                                       # Convert to pandas bliss

In [18]:
np.sort(cs_data['LOSADD'].unique())

array(['AASU', 'AASU FOU', 'AFAO', 'AFONO', 'AGUGULU', 'ALAO', 'ALEGA',
       'ALOFAU', 'AMALUIA', 'AMANAVE', 'AMAUA', 'AMOULI', 'AOA', 'AOLOAU',
       'ASILI', 'ATAULOMA', 'ATUU', 'AUA', 'AUASI', 'AUNUU', 'AUTO',
       'AVAIO', 'FAGAALU', 'FAGAITUA', 'FAGALII', 'FAGAMALO', 'FAGANEANEA',
       'FAGASA', 'FAGATOGO', 'FAILOLO', 'FALEASAO', 'FALENIU',
       'FATUMAFUTI', 'FITIUTA', 'FOGAGOGO', 'FUTIGA', 'Fagaalu',
       'GATAIVAI', 'ILIILI', 'LAULII', 'LELOALOA', 'LEONE', 'MALAEIMI',
       'MALAELOA', 'MALALOA', 'MALOATA', 'MAPUSAGA', 'MAPUSAGA FOU',
       'MASAUSI', 'MASEFAU', 'MATUU', 'MESEPA', 'NUA', 'NUUULI', 'OFU',
       'OLOSEGA', 'ONENOA', 'PAGAI', 'PAGO PAGO', 'PAVAIAI', 'POLOA',
       'SAILELE', 'SATALA', 'SEETAGA', 'TAFETA', 'TAFUNA', 'TAPUTIMU',
       'TAU', 'TULA', 'UTULEI', 'UTUMEA', 'UTUMEA-SASAE', 'UTUSIA',
       'VAILOA', 'VAITOGI', 'VATIA', 'Vaitogi'], dtype=object)

In [19]:
np.sort(shapefile_df['VILLAGE'].unique())   

array(['Aasu', 'Afao', 'Afono', 'Agugulu', 'Alao', 'Alega', 'Alofau',
       'Amaluia', 'Amanave', 'Amaua', 'Amouli', 'Anua', 'Aoa', 'Aoloau',
       'Asili', 'Atuu', 'Aua', 'Auasi', 'Aumi', 'Aunuu', 'Auto', 'Avaio',
       'Fagaalu', 'Fagaitua', 'Fagalii', 'Fagamalo', 'Faganeanea',
       'Fagasa', 'Fagatogo', 'Failolo', 'Faleniu', 'Fatumafuti', 'Futiga',
       'Iliili', 'Laulii', 'Leloaloa', 'Leone', 'Malaeimi', 'Malaeloa',
       'Maloata', 'Mapusagafou', 'Masausi', 'Masefau', 'Matuu', 'Mesepa',
       'Nua', 'Nuuuli', 'Ofu', 'Olosega', 'Onenoa', 'Pagai', 'Pago Pago',
       'Pavaiai', 'Poloa', 'Sailele', 'Seetaga', 'Tafuna', 'Taputimu',
       'Tau', 'Tula', 'Utulei', 'Utumea East', 'Utumea West', 'Vailoatai',
       'Vaitogi', 'Vatia'], dtype=object)

In [20]:
# Make a key that tells which CS data village (first key) to map to which shapefile village (second value)
village_key = {
    'AASU':"Aasu", 
    'AASU FOU':'Aasu', 
    'AFAO':'Afao', 
    'AFONO':'Afono', 
    'AGUGULU':'Agugulu', 
    'ALAO':'Alao', 
    'ALEGA':'Alega', 
    'ALOFAU':'Alofau', 
    'AMALUIA':'Amaluia', 
    'AMANAVE':'Amanave', 
    'AMAUA':'Amaua', 
    'AMOULI':'Amouli', 
    'AOA':'Aoa', 
    'AOLOAU':'Aoloau', 
    'ASILI':'Asili', 
    'ATAULOMA':'Afao', 
    'ATUU':'Atuu', 
    'AUA':'Aua', 
    'AUASI':'Auasi', 
    'AUNUU':'Aunuu', 
    'AUTO':'Auto', 
    'AVAIO':'Avaio', 
    'FAGAALU':'Fagaalu', 
    'FAGAITUA':'Fagaitua', 
    'FAGALII':'Fagalii', 
    'FAGAMALO':'Fagamalo', 
    'FAGANEANEA':'Faganeanea', 
    'FAGASA':'Fagasa', 
    'FAGATOGO':'Fagatogo', 
    'FAILOLO':'Failolo', 
    'FALEASAO':'Tau', 
    'FALENIU':'Faleniu', 
    'FATUMAFUTI':'Fatumafuti', 
    'FITIUTA':'Tau', 
    'FOGAGOGO':'Iliili', 
    'FUTIGA':'Futiga', 
    'Fagaalu':'Fagaalu', 
    'GATAIVAI':'Utulei', 
    'ILIILI':'Iliili', 
    'LAULII':'Laulii', 
    'LELOALOA':'Leloaloa', 
    'LEONE':'Leone', 
    'MALAEIMI':'Malaeimi', 
    'MALAELOA':'Malaeloa', 
    'MALALOA':'Fagatogo', 
    'MALOATA':'Maloata', 
    'MAPUSAGA':'Mapusagafou', 
    'MAPUSAGA FOU':'Mapusagafou', 
    'MASAUSI':'Masausi', 
    'MASEFAU':'Masefau', 
    'MATUU':'Matuu', 
    'MESEPA':'Mesepa', 
    'NUA':'Nua', 
    'NUUULI':'Nuuuli', 
    'OFU':'Ofu', 
    'OLOSEGA':'Olosega', 
    'ONENOA':'Onenoa', 
    'PAGAI':'Pagai', 
    'PAGO PAGO':'Pago Pago', 
    'PAVAIAI':'Pavaiai', 
    'POLOA':'Poloa', 
    'SAILELE':'Sailele', 
    'SATALA':'Pago Pago', 
    'SEETAGA':'Seetaga', 
    'TAFETA':'Mapusagafou', 
    'TAFUNA':'Tafuna', 
    'TAPUTIMU':'Taputimu', 
    'TAU':'Tau', 
    'TULA':'Tula', 
    'UTULEI':'Utulei', 
    'UTUMEA':'Utumea West', 
    'UTUMEA-SASAE':'Utumea East', 
    'UTUSIA':'Fagaitua', 
    'VAILOA':'Vailoatai', 
    'VAITOGI':'Vaitogi', 
    'VATIA':'Vatia', 
    'Vaitogi':'Vaitogi'   
}

In [21]:
# Total up village water usage 
agg_data = pd.DataFrame(columns={"Village", 'Usage_gal'})

for i in cs_data['LOSADD'].unique():
    temp_df = cs_data[cs_data['LOSADD'] == i]  
    tot_use = temp_df['Usage'].sum()             # Total up usage in the village
    dici = {"Village":i, 'Usage_gal':tot_use}    
    agg_data = agg_data.append(dici, ignore_index = True)   # add new row with total usage to DF  
   

### Assess lookup functions 

In [22]:
cs_data[cs_data['Route#'] == 112.0]

Unnamed: 0,Usage,Days of Usage,Billed Amount for Water,MTH/YR OF BILLING,Route#,LOSADD,RSP,Rate,LOMETW,Meter Size,Unnamed: 10,Unnamed: 11
257,5735,32.0,40.00,202205.0,112.0,FAGAITUA,RES,R,90277465.0,"5/8""",,
678,1118,32.0,20.04,202205.0,112.0,FAGAITUA,RES,R,87127204.0,"5/8""",,
1218,0,32.0,15.21,202205.0,112.0,ALEGA,RES,R,87126656.0,"5/8""",,
1886,94,32.0,15.61,202205.0,112.0,AUTO,RES,R,87125967.0,"5/8""",,
2967,185,32.0,16.01,202205.0,112.0,FAGAITUA,RES,R,84554169.0,"5/8""",,
3073,16511,32.0,89.83,202205.0,112.0,ALEGA,RES,R,84554057.0,"5/8""",,
5290,731,32.0,18.36,202205.0,112.0,AMAUA,RES,R,83509131.0,"5/8""",,
5304,7578,32.0,47.96,202205.0,112.0,AUTO,RES,R,83509117.0,"5/8""",,
5651,27,32.0,15.33,202205.0,112.0,AMAUA,RES,R,83508741.0,"5/8""",,
6030,422,32.0,17.03,202205.0,112.0,ALEGA,RES,R,83508341.0,"5/8""",,


In [154]:
cs_data[cs_data['LOSADD'] == 'Vaitogi']

Unnamed: 0,Usage,Days of Usage,Billed Amount for Water,MTH/YR OF BILLING,Route#,LOSADD,RSP,Rate,LOMETW,Meter Size,Unnamed: 10,Unnamed: 11
237,1343,30.0,21.01,202205.0,337.0,Vaitogi,RES,R,90277486.0,"5/8""",,
