In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
import urllib2
from pylab import rcParams
rcParams['figure.figsize'] = 15, 10

In [2]:
import sys
import platform
import matplotlib
print("Operating System " + platform.system() + " " + platform.release())
print("Python Version " + str(sys.version))
print("Pandas Version " + str(pd.__version__))
print("Numpy Version " + str(np.__version__))
print("Matplotlib Version " + str(matplotlib.__version__))

Operating System Windows 7
Python Version 2.7.8 (default, Jun 30 2014, 16:03:49) [MSC v.1500 32 bit (Intel)]
Pandas Version 0.17.1
Numpy Version 1.10.4
Matplotlib Version 1.5.1


In [5]:
paramcodeURL = "http://nwis.waterdata.usgs.gov/nwis/pmcodes?radio_pm_search=param_group&pm_group=All+--+include+all+parameter+groups&pm_search=&casrn_search=&srsname_search=&format=rdb&show=parameter_group_nm&show=parameter_nm&show=casrn&show=srsname&show=parameter_units"

In [21]:
NWISParam = pd.read_table(paramcodeURL,comment='#')

In [22]:
NWISParam = NWISParam.loc[1:]

In [23]:
NWISParam

Unnamed: 0,parameter_cd,parameter_group_nm,parameter_nm,casrn,srsname,parameter_units
1,00001,Information,"Location in cross section, distance from right...",,,ft
2,00002,Information,"Location in cross section, distance from right...",,,%
3,00003,Information,"Sampling depth, feet",,,ft
4,00005,Information,"Location in cross section, fraction of total d...",,,%
5,00008,Information,Sample accounting number,,,nu
6,00009,Information,"Location in cross section, distance from left ...",,,ft
7,00022,Information,"Duration of exposure, sample or test, days",,,days
8,00023,Information,"Sample weight, pounds",,,lb
9,00024,Information,"Sample length, inches",,,in
10,00028,Information,"Agency analyzing sample, code",,,code


In [61]:
def parMedia(x):
    '''
    parses sample media based on parameter description
    
    http://waterqualitydata.us/portal_userguide/#WQPUserGuide-Table2
    '''
    if ', water, ' in x:
        media = 'water'
    elif ' soil, ' in x:
        media = 'soil'
    elif ', solids' in x:
        media = 'soil'
    elif ', rock, ' in x:
        media = 'rock'
    elif ', biota, ' in x:
        media = 'biota'
    elif 'atmospheric deposition, ' in x:
        media = 'air'
    elif ', bed sediment' in x:
        media = 'bed sediment'
    elif ', suspended sediment' in x:
        media = 'suspended sediment'
    elif ', air, ' in x:
        media = 'air'
    else:
        media = ''
    return media
        
def parSampFrac(x):
    '''
    parses sample fraction based on parameter description
    '''
    if ', dissolved,' in x:
        sampfrac = 'dissolved' 
    elif ', filtered' in x:
        sampfrac = 'filtered'
    elif ', unfiltered' in x:
        sampfrac = 'unfiltered'
    else:
        sampfrac = ''
    return sampfrac

def fieldCol(x):
    if ', field, ' in x:
        fieldCol = 'field'
    elif ', lab, ' in x:
        fieldCol = 'lab'
    else:
        fieldCol = ''
    return fieldCol

def getLast(x):
    return str(x).split(", ")[-1]

def getAsUnits(x):
    g = x.split("as ")
    if len(g) > 1:
        return g[-1]

In [89]:
NWISParam['media'] = NWISParam['parameter_nm'].apply(lambda x: parMedia(x), 1) # defines sample media based on parameter description 
NWISParam['sampfrac'] = NWISParam['parameter_nm'].apply(lambda x: parSampFrac(x), 1) # defines sample fraction based on parameter description
NWISParam['fieldCol'] = NWISParam['parameter_nm'].apply(lambda x: fieldCol(x),1)
NWISParam['units'] = NWISParam['parameter_nm'].apply(lambda x: getLast(x), 1)
NWISParam['asUnits'] = NWISParam['units'].apply(lambda x: getAsUnits(x), 1)
NWISParam['lwrSRS'] = NWISParam['srsname'].apply(lambda x: str(x).lower(), 1)


In [90]:
NWISParamWater = NWISParam[(NWISParam['media']=='water')|(NWISParam['media']=='')]

In [121]:
NWISParamWater[NWISParamWater['lwrSRS']=='temperature']['parameter_group_nm'].values[0]

IndexError: index 0 is out of bounds for axis 0 with size 0

In [70]:
NWISParamWater[NWISParamWater['parameter_cd']=='00931']

Unnamed: 0,parameter_cd,parameter_group_nm,parameter_nm,casrn,srsname,parameter_units,media,sampfrac,units,asUnits,fieldCol
1965,931,"Inorganics, Major, Metals","Sodium adsorption ratio (SAR), water, number",,Sodium adsorption ratio,,water,,number,,


In [73]:
DBParamFile = "E:/GitHub/ParamGroup/DB_Params.csv"

In [74]:
DBParam = pd.read_csv(DBParamFile)

In [76]:
DBParam.dropna(subset=['Param'],inplace=True)

In [78]:
DBParam.drop_duplicates(subset=['Param'],inplace=True)

In [116]:
def getCAS(x, NWISParamWater):
    try:
        return NWISParamWater[NWISParamWater['lwrSRS']==str(x).lower()]['casrn'].values[0]
    except:
        try:
            return NWISParamWater[NWISParamWater['lwrSRS']==str(x).lower().split(' (')[0]]['casrn'].values[0]
        except:
            try:
                return NWISParamWater[NWISParamWater['lwrSRS']==str(x).lower().split(', ')[0]]['casrn'].values[0]
            except:
                return np.nan
def getPG(x, NWISParamWater):
    try:
        return NWISParamWater[NWISParamWater['lwrSRS']==str(x).lower()]['parameter_group_nm'].values[0]
    except:
        try:
            return NWISParamWater[NWISParamWater['lwrSRS']==str(x).lower().split(' (')[0]]['parameter_group_nm'].values[0]
        except:
            try:
                return NWISParamWater[NWISParamWater['lwrSRS']==str(x).lower().split(', ')[0]]['parameter_group_nm'].values[0]
            except:
                return np.nan


In [117]:
DBParam['CAS_Reg'] = DBParam['Param'].apply(lambda x: getCAS(x, NWISParamWater),1)
DBParam['ParamGroup'] = DBParam['Param'].apply(lambda x: getPG(x, NWISParamWater),1)

In [122]:
DBParam.to_clipboard()