### Import USGS use data & Create usage table
Here we download the raw usage data for years 2000, 2005, and 2010 from the USGS usage site and synthesize all data into a flat table listing: year, FIPS code, and total annual withdrawals by sector in MGal/day (*so these need to be ajdusted for yearly sums!)

The steps required include:
* Pulling the raw data file, in tab-delimted format, from the USGS server into a pandas dataframe.
* Adjusting field names to a standard nomenclature:
    * Adding year columns to 2000 and 2005 datasets.
    * For year 2000, remapping 'IT', 'LA', 'LS', and 'PE' fields to 'IC', 'AQ', 'LI' and 'PC', respectively([reference]( https://water.usgs.gov/watuse/data/2000/datadict.html)).
        * IT -> IC (Irrigated cropland)
        * LA -> AQ (Aquaculture)
        * LS -> LI (Livestock)
        * PE -> PC (Thermoelectric power closed-loop)
    * For year 2005, remapping 'LA' and 'LS' fields to 'AQ' and 'LI', respectively([reference]( https://water.usgs.gov/watuse/data/2005/datadict.html)). 
        * LA -> AQ (Aquaculture)
        * LS -> LI (Livestock)
* Re-arranging data into a tidy format to facilitate additional analyses.
    * Care must be taken that the FIPS codes are preserved as text, not numbers
* Appending all tables into a single dataframe, with records tagged by the year of the dataset.
* Removing extraneous fields, keeping only the sector total columns:
    
The resulting table will be formatted as follows:

| YEAR | FIPS | UseClass | SourceClass | SourceType | Amount |
| :---: | :---: | :---: | :---: | :---: | :---: | :---: | 
| 2000 | 01001 | PublicSupply | Surface  | Fresh |0.00 |
| 2000 | 01001 | Industrial | Ground  | Saline |0.00 |

This table can then be summarized and joined, by YEAR and FIPS code, to other accounting data tables and summarized by state. 


In [1]:
#Import modules required for analysis
import os
import pandas as pd
import numpy as np

In [2]:
#Create prefix remapping dictionaries to standardize column names for all years to 2010
remapDict = {'IT-':'IR-', #Irrigated (total)
             'LA-':'AQ-', #Aquaculture
             'LS-':'LI-', #Livestock
             'PE-':'PC-', #Closed-loop thermo electric
             'Wtotl':'WTotl' #Capitalization mismatch
            }

In [3]:
#Create a function for importing a table and standardizing all field names
def importTable(year, remapDict):
    
    #Set the download URL using with the year
    theURL = 'http://water.usgs.gov/watuse/data/{0}/usco{0}.txt'.format(year) 
    
    #Download the dataset to a data frame (keeping the FIPS attribute as a string)
    df = pd.read_table(theURL.format(year),dtype={'FIPS':str})
    
    #Remove the STATE, STATEFIPS and COUNTYFIPS columns (Not needed)
    df.drop("STATE",axis=1,inplace=True)
    df.drop("STATEFIPS",axis=1,inplace=True)
    df.drop("COUNTYFIPS",axis=1,inplace=True)
    
    #Use the remap dictionary to rename columns
    
    #Get the current column names as a list
    colNames = df.columns.values.tolist()
    
    for inFld,outFld in remapDict.items():
        #This loops through each item in colNames and replaces it with a revised one
        colNames_update = [x.replace(inFld,outFld) for x in colNames]
        colNames = colNames_update

    #Update the column names in the data frame
    df.columns = colNames
    
    #Add year field, if not present
    if "YEAR" not in df.columns.values: 
        df.insert(1,"YEAR",year)
        
    #Remove unnamed columns
    if "Unnamed" in df.columns.values[-1]:
        df.drop(df.columns.values[-1],axis=1,inplace=True)
        
    #Status
    print "{} records and {} attributes returned for year {}".format(df.shape[0],df.shape[1],year)
        
    #Return the data frame
    return df

In [4]:
#Get the tables
df2000 = importTable(2000,remapDict)
df2005 = importTable(2005,remapDict)
df2010 = importTable(2010,remapDict)

3222 records and 68 attributes returned for year 2000
3224 records and 105 attributes returned for year 2005
3224 records and 114 attributes returned for year 2010


In [5]:
#"Melt" data so each use/class/type becomes a new row
df2000m = pd.melt(df2000,id_vars=['FIPS','YEAR'],var_name='Class',value_name='Amount')
df2005m = pd.melt(df2005,id_vars=['FIPS','YEAR'],var_name='Class',value_name='Amount')
df2010m = pd.melt(df2010,id_vars=['FIPS','YEAR'],var_name='Class',value_name='Amount')

In [6]:
#Merge all records into a single table
dfUse = pd.concat([df2000m, df2005m, df2010m],ignore_index=True)

In [54]:
#Create remap dictionaries
useClassMap = {'TP':'Population',
               'PS':'PublicSupply',
               'DO':'Domestic',
               'IN':'Industrial',
               'IR':'Irrigation',
               'IC':'Irrigation_Crop',
               'IG':'Irrigation_Golf',
               'LI':'Livestock',
               'AQ':'Aquaculture',
               'MI':'Mining',
               'PT':'ThermoElec',
               'PO':'ThermoElec_OnceThru',
               'PC':'ThermoElec_Recirc',
               'TO':'Total'
              }

srcClassMap = {'-WGW':'Groundwater',
               '-WSW':'Surface'
              }

srcTypeMap = {'Fr':'Fresh',
              'Sa':'Saline',
              'To':'Total',
              'WFtTo':'TotFresh',
              'WSaTo':'TotSaline',
              'WTotl':'TotWithdrawal'
             }

In [9]:
#Set the use classes
for k,v in useClassMap.items():
    dfUse.loc[dfUse.Class.str.startswith(k), 'UseClass'] = v

In [20]:
#Set the source classes
for k,v in srcClassMap.items():
    dfUse.loc[dfUse.Class.str.contains(k), 'SrcClass'] = v

In [55]:
#Set the source types
for k,v in srcTypeMap.items():
    dfUse.loc[dfUse.Class.str.endswith(k), 'SrcType'] = v

In [59]:
#CHECK: List classes with null srcClass remaps
dfUse[pd.isnull(dfUse['UseClass'])].Class.unique()

array(['COUNTY'], dtype=object)

In [57]:
#CHECK: List classes with null srcClass remaps
dfUse[pd.isnull(dfUse['SrcClass'])].Class.unique()

array(['TP-TotPop', 'PS-TOPop', 'DO-SSPop', 'IR-IrSpr', 'IR-IrMic',
       'IR-IrSur', 'IR-IrTot', 'PS-GWPop', 'PS-SWPop', 'DO-PSDel',
       'DO-TOTAL ', 'IC-IrSpr', 'IC-IrMic', 'IC-IrSur', 'IC-IrTot',
       'IG-IrSpr', 'IG-IrMic', 'IG-IrSur', 'IG-IrTot', 'PT-Power',
       'PO-Power', 'PC-Power', 'COUNTY', 'DO-SSPCp', 'DO-PSPCp'], dtype=object)

In [58]:
#CHECK: List classes with null srcClass remaps
dfUse[pd.isnull(dfUse['SrcType'])].Class.unique()

array(['TP-TotPop', 'PS-TOPop', 'DO-SSPop', 'IR-IrSpr', 'IR-IrMic',
       'IR-IrSur', 'IR-IrTot', 'PS-GWPop', 'PS-SWPop', 'DO-PSDel',
       'DO-TOTAL ', 'IC-IrSpr', 'IC-IrMic', 'IC-IrSur', 'IC-IrTot',
       'IG-IrSpr', 'IG-IrMic', 'IG-IrSur', 'IG-IrTot', 'PT-Power',
       'PO-Power', 'PC-Power', 'COUNTY', 'DO-SSPCp', 'DO-PSPCp'], dtype=object)

In [62]:
#Remove rows (axis = 0), with a null value in 'any' column
dfUse.dropna(axis=0,how='any',inplace=True)

In [63]:
#Set the output location and filename
dataDir = '../../Data'
outFN = 'UsageDataTidy.csv'
dfUse.to_csv(dataDir + os.sep + outFN,index=False)

In [None]:
#Check that the final merge contains all yearly columns
s00 = set(df2000.columns.values.tolist())
s05 = set(df2005.columns.values.tolist())
s10 = set(df2010.columns.values.tolist())
sXX = set(df.columns.values.tolist())
print s00.issubset(sXX)
print s05.issubset(sXX)
print s10.issubset(sXX)

In [None]:
#Fix FIPS column
dfUsage['FIPS'] = df['FIPS'].apply(lambda x: str(x).zfill(5))

In [None]:
#Save as a CSV file
dfUsage.to_csv(outFN,index_label="KEY")