Import relevant Python libraries.

In [92]:
import pandas as pd
import cx_Oracle
import numpy as np
import sys
import platform

In [93]:
import wellapplication as wa

Print version information.

In [94]:
print("Operating System " + platform.system() + " " + platform.release())
print("Python Version " + str(sys.version))
print("Pandas Version " + str(pd.__version__))
print("Numpy Version " + str(np.__version__))
print("Wellapplication Version " + str(wa.__version__))

Operating System Windows 7
Python Version 3.5.3 |Continuum Analytics, Inc.| (default, May 15 2017, 10:43:23) [MSC v.1900 64 bit (AMD64)]
Pandas Version 0.20.1
Numpy Version 1.11.2
Wellapplication Version 0.4.26


# SDWIS Data

In [95]:
file_place = 'E:/Google Drive/WORK/Groundwater Chemistry/SDWIS/test/{:}.csv'

Import database connection.  This is kept in a separate file because it contains sensitive information.

In [96]:
connection_filepath = "E:\Google Drive\WORK\Groundwater Chemistry"
sys.path.append(connection_filepath)
import oraconnection

This function below is just calling the imported database connection and assigning it to a variable.

In [97]:
connection = oraconnection.oraconnect()

## SDWIS Chemistry Data

`SDWISconst` is a list of the constituents of interest.  The percent sign is used as a wildcard because all of the parameter names have trailing spaces or other issues.

In [100]:
SDWISconst = ['PHOSPHORUS%', 'ARSENIC%','CALCIUM%','CHLORIDE%',
              'FLUORIDE%','IRON%', 'MAGNESIUM%','NITRATE%','CARBONATE%',
              'POTASSIUM%','BICARBONATE%','ALKA%','PHOSPHATE%',
              'SILICA%','SODIUM%','SULFATE%','BORON%','BROMIDE%','PH%',
              'TDS%','CONDUCT%','URANIUM%','TEMPER%','HARDN%','TURB%']

The following is the SQL query used to pull the chemistry data from the database.  It renames the fields (from `SELECT` to `FROM`), then joins the appropriate tables (from `FROM` to `WHERE`, then limits the output to Springs and Wells and state issigned id code.  The `{:}` in the `WHERE` clause is a place holder for one of the above parameters in the `SDWISconst` list.

In [101]:
SQLtext = """
SELECT
UTV80.TINWSF.TYPE_CODE AS "SampType", 
UTV80.TINWSF.TINWSF_IS_NUMBER AS "site_no", 
UTV80.TINWSYS.TINWSYS_IS_NUMBER AS "OrgID", 
UTV80.TINWSYS.NAME AS "OrgName", 
UTV80.TSASAMPL.TSASAMPL_IS_NUMBER AS "SampleID", 
UTV80.TSASAMPL.COLLLECTION_END_DT AS "sample_dt", 
UTV80.TSASAMPL.COLLCTN_END_TIME AS "sample_tm", 
UTV80.TSAANLYT.NAME AS "Param", 
UTV80.TSASAR.CONCENTRATION_MSR AS "result_va", 
UTV80.TSASAR.UOM_CODE AS "Unit", 
UTV80.TSASAR.ANALYSIS_START_DT AS "AnalysisDate", 
UTV80.TSASAR.DETECTN_LIMIT_NUM AS "MDL", 
UTV80.TSASAR.DETECTN_LIM_UOM_CD AS "MDLUnit", 
UTV80.TSAANLYT.CAS_REGISTRY_NUM AS "CAS_Reg", 
UTV80.TSASAR.TSASAR_IS_NUMBER AS "ID_NUM",
UTV80.TSASAMPL.COMMENT_TEXT AS "SamplePointComment",
UTV80.TSASMPPT.DESCRIPTION_TEXT AS "SamplePointDesc"

FROM 
UTV80.TINWSF -- Water System Facility (water source)
JOIN UTV80.TINWSYS ON -- Water System (town or water provider)
UTV80.TINWSF.TINWSYS_IS_NUMBER = UTV80.TINWSYS.TINWSYS_IS_NUMBER 
JOIN UTV80.TSASMPPT ON  -- Sample point
UTV80.TINWSF.TINWSF_IS_NUMBER = UTV80.TSASMPPT.TINWSF0IS_NUMBER
JOIN UTV80.TSASAMPL ON -- Sample
UTV80.TSASMPPT.TSASMPPT_IS_NUMBER = UTV80.TSASAMPL.TSASMPPT_IS_NUMBER  -- Sample Point
JOIN UTV80.TSASAR ON -- Sample Result
UTV80.TSASAMPL.TSASAMPL_IS_NUMBER = UTV80.TSASAR.TSASAMPL_IS_NUMBER   -- Sample Number
JOIN UTV80.TSAANLYT ON -- Analyte
UTV80.TSASAR.TSAANLYT_IS_NUMBER = UTV80.TSAANLYT.TSAANLYT_IS_NUMBER  -- Analyte Number

WHERE (UTV80.TINWSF.TYPE_CODE = 'WL') -- Spring or well
AND (UTV80.TSAANLYT.NAME LIKE '{:}') -- Name of Chemical Parameter
AND UTV80.TSASMPPT.SOURCE_TYPE_CODE = 'RW' -- Raw (unfiltered) water
AND UTV80.TSASMPPT.TYPE_CODE IN('RW','EP','SR') -- From source
ORDER BY UTV80.TINWSF.ST_ASGN_IDENT_CD
"""

The following script loops through the constituents of interest and downloads to csvs based on the above query, inserting the constituent name each time. It also reformats the date and time fields using Pandas (see http://strftime.org) and adds an `agency_cd` field. It generates a Pandas Dataframe for each parameter and stores it in a <a href='https://www.tutorialspoint.com/python/python_dictionary.htm'>dictionary</a> with the parameter name as the key

In [102]:
df_ora = {}
for j in SDWISconst:
    SQL = SQLtext.format(j)
    k = j[:-1]
    df_ora[k] = pd.read_sql(SQL, con = connection,
                            parse_dates=['sample_dt','sample_tm','AnalysisDate'])
    df_ora[k]['agency_cd'] = 'UDDW'

We can then use Pandas to combine all of the <a href='https://pandas.pydata.org/pandas-docs/stable/dsintro.html'>Dataframes</a> into one massive dataframe and then save it as a csv.  The reason I didn't do this in the first place (just query all of the constituents) is because the large data request tends to lag or lock up.

In [103]:
SDWISallraw = pd.concat(df_ora)
SDWISallraw.reset_index(inplace=True)
SDWISallraw.drop(['level_0','level_1'],inplace=True,axis=1)
#SDWISallraw.to_csv(file_place.format('all'))

Let's strip out unneeded white space to make units and parameter names more consistent.

In [104]:
def unitstrip(x):
    if x is None:
        return x
    else:
        return x.strip()
    
SDWISallraw['Param'] = SDWISallraw['Param'].apply(lambda x: x.strip(),1) 
SDWISallraw['Unit'] = SDWISallraw['Unit'].apply(lambda x: unitstrip(x),1)
SDWISallraw['MDLUnit'] = SDWISallraw['MDLUnit'].apply(lambda x: unitstrip(x),1)

Add `remark_cd` field and populate based on value in `result_va` and the reported MDL.

In [105]:
def fillmdlspot(x):
    """
    PARAM
    ------
    x[0] = result_va
    x[1] = Unit
    x[2] = MDL
    x[3] = MDLUnit
    
    RETURNS
    -------
    remark_cd, result_va, Unit
    """
    if pd.isnull(x[0]) and pd.notnull(x[2]):
        return '<',x[2],x[3]
    else:
        return np.nan, x[0], x[1]

In [106]:
SDWISallraw['remark_cd'],SDWISallraw['result_va'],SDWISallraw['Unit'] =\
zip(*SDWISallraw[['result_va','Unit','MDL','MDLUnit']].apply(lambda x: fillmdlspot(x),1))

Convert ug/L to mg/L in cases where units should be in mg/L. We have to do this for both the MDL and the results.

In [107]:
def converter(x):
    # these are the parameters where we want the reported units to be ug/L
    microgrammers = ['BORON,TOTAL','IRON','IRON, DISSOLVED','ARSENIC','BORON']
    if x[0] == 'UG/L' and x[2] not in microgrammers:
        return x[1]*0.001, 'MG/L'
    elif x[0] == 'LBS/GAL':
        return x[1]*119826.0, 'MG/L'
    else:
        return x[1], x[0]

In [108]:
SDWISallraw['result_va'], SDWISallraw['Unit'] =\
zip(*SDWISallraw[['Unit','result_va','Param']].apply(lambda x: converter(x),1))

In [109]:
SDWISallraw['MDL'], SDWISallraw['MDLUnit'] =\
zip(*SDWISallraw[['MDLUnit','MDL','Param']].apply(lambda x: converter(x),1))

Match to USGS parameter codes to the named paramters.  The parameter codes can be found at the following links:<br>
https://nwis.waterdata.usgs.gov/usa/nwis/pmcodes?radio_pm_search=param_group&pm_group=All+--+include+all+parameter+groups&pm_search=&casrn_search=&srsname_search=&format=html_table&show=parameter_group_nm&show=parameter_nm&show=casrn&show=srsname&show=parameter_units<br>
https://nwis.waterdata.usgs.gov/usa/nwis/pmcodes

In [110]:
USGSmatch = {'CONDUCTIVITY @ 25 C UMHOS/CM':'P00400', 'BORON':'P01021',
             'ARSENIC':'P01000', 'BICARBONATE AS HCO3':'P00451',
             'ALKALINITY, BICARBONATE':'P00451','ALKALINITY, CARBONATE':'P00448',
             'CARBONATE':'P00448',
             'BORON, TOTAL':'P00999', 'BROMIDE':'P71870', 'CALCIUM':'P00915', 
             'CALCIUM HARDNESS':'P00900','CHLORIDE':'P00940', 'FLUORIDE':'P00950', 
             'IRON':'P01045', 'IRON, DISSOLVED':'P01046','MAGNESIUM':'P00925', 
             'NITRATE-NITRITE':'P00630', 'NITRATE':'P00620', 'PH':'P00400',
             'PHOSPHATE, TOTAL':'P00650', 'PHOSPHORUS, TOTAL':'P00665', 'POTASSIUM':'P00935', 
             'SILICA':'P00955','SODIUM':'P00930', 'SULFATE':'P00945', 'TDS':'P70300',
             'TEMPERATURE (CENTIGRADE)':'P00010','ALKALINITY, TOTAL':'P00421',
             'ALKALINITY, CACO3 STABILITY':'P00421'}

In [111]:

USGSmatch = {'CONDUCTIVITY @ 25 C UMHOS/CM':'sc', 'BORON':'bo',
             'ARSENIC':'as', 'BICARBONATE AS HCO3':'hco3',
             'ALKALINITY, BICARBONATE':'hco3','ALKALINITY, CARBONATE':'co3',
             'CARBONATE':'co3',
             'BORON, TOTAL':'bo', 'BROMIDE':'br', 'CALCIUM':'ca', 
             'CALCIUM HARDNESS':'hard','CHLORIDE':'cl', 'FLUORIDE':'f', 
             'IRON, DISSOLVED':'fe','MAGNESIUM':'mg', 
             'NITRATE-NITRITE':'630', 'NITRATE':'620', 'PH':'ph',
             'PHOSPHORUS, TOTAL':'op', 'POTASSIUM':'k', 
             'SILICA':'si','SODIUM':'na', 'SULFATE':'so4', 'TDS':'ds',
             'TEMPERATURE (CENTIGRADE)':'temp','ALKALINITY, TOTAL':'alk', 'URANIUM-238':'ur',
             'TURBIDITY':'turb'}


In [112]:
SDWISallraw.head()

Unnamed: 0,SampType,site_no,OrgID,OrgName,SampleID,sample_dt,sample_tm,Param,result_va,Unit,AnalysisDate,MDL,MDLUnit,CAS_Reg,ID_NUM,SamplePointComment,SamplePointDesc,agency_cd,remark_cd
0,WL,4165,992,MONUMENT VALLEY HIGH SCHOOL,262132,1980-05-21,NaT,"ALKALINITY, TOTAL",166.0,MG/L,NaT,,,,1596288,Sample taken at WELL #1,WELL #1,UDDW,
1,WL,5195,1492,TAYLOR-WEST WEBER WID,258227,1982-01-26,NaT,"ALKALINITY, TOTAL",203.0,MG/L,NaT,,,,1526231,Sample taken at BIG WELL,BIG WELL,UDDW,
2,WL,4157,1252,RIO ALGOM MINING CORP,262088,1972-05-02,NaT,"ALKALINITY, TOTAL",209.0,MG/L,NaT,,,,1595989,Sample taken at MAPLE LEAF #1,MAPLE LEAF #1,UDDW,
3,WL,4308,915,MANTI CITY,263256,1985-09-26,NaT,"ALKALINITY, TOTAL",159.0,MG/L,NaT,,,,1608824,Sample taken at SEC 12 T18S R2E,SEC 12 T18S R2E,UDDW,
4,WL,3667,1108,CURRANT CREEK,386099,2005-08-09,NaT,"ALKALINITY, TOTAL",3.0,MG/L,NaT,,,,3053363,,WELL # 1,UDDW,


In [113]:
SDWISallraw.columns

Index(['SampType', 'site_no', 'OrgID', 'OrgName', 'SampleID', 'sample_dt',
       'sample_tm', 'Param', 'result_va', 'Unit', 'AnalysisDate', 'MDL',
       'MDLUnit', 'CAS_Reg', 'ID_NUM', 'SamplePointComment', 'SamplePointDesc',
       'agency_cd', 'remark_cd'],
      dtype='object')

In [114]:
SDWISallraw.Param.unique()

array(['ALKALINITY, TOTAL', 'ALKALINITY, CARBONATE',
       'ALKALINITY, BICARBONATE', 'ALKALINITY, CACO3 STABILITY', 'ARSENIC',
       'BICARBONATE AS HCO3', 'BORON, TOTAL', 'BROMIDE', 'CALCIUM',
       'CALCIUM HARDNESS', 'CHLORIDE', 'CONDUCTIVITY @ 25 C UMHOS/CM',
       'FLUORIDE', 'HARDNESS, CARBONATE', 'HARDNESS, TOTAL (AS CACO3)',
       'HARDNESS, CALCIUM MAGNESIUM', 'IRON', 'IRON, DISSOLVED',
       'MAGNESIUM', 'NITRATE', 'NITRATE-NITRITE', 'PHOSPHATE, TOTAL', 'PH',
       'PHOSPHORUS, TOTAL', 'POTASSIUM', 'SILICA', 'SODIUM', 'SULFATE',
       'TDS', 'TEMPERATURE (CENTIGRADE)', 'TURBIDITY', 'URANIUM-238',
       'URANIUM-235', 'URANIUM-234'], dtype=object)

In [115]:
olivianames = {'agency_cd':'ac','site_no':'stid','SamplePointDesc':'sme','sample_dt':'dates','sample_tm':'times',}

In [116]:
SDWISallraw = SDWISallraw.rename(columns=olivianames)
SDWISallraw['year'] = SDWISallraw['dates'].apply(lambda x: pd.to_datetime(x).year,1)
SDWISallraw['times'] = SDWISallraw['times'].apply(lambda x: pd.to_datetime(x, format = '%H%M'),1)
SDWISallraw['database'] = 'SDWIS'
SDWISallraw['tmdtm'] = 'MDT'


In [117]:
SDWISallraw['parm_cd'] = SDWISallraw['Param'].apply(lambda x: str(USGSmatch.get(x,'')),1)

Combine result and remark for pivot.

In [118]:
def comb_res_rmk(x):
    """combine remark and result"""
    if x[0]== '<':
        return str(x[0])+str(x[1])
    else:
        return str(x[1])

In [119]:
SDWISallraw['res_w_rmk'] = SDWISallraw[['remark_cd','result_va']].apply(lambda x: comb_res_rmk(x),1)

Drop duplicates and null values.

In [120]:
SDWISallraw.dropna(subset=['result_va'],inplace=True)
SDWISallraw.sort_values(by=['SampleID','Param'],inplace=True)
SDWIS = SDWISallraw.drop_duplicates(subset=['SampleID','Param'])
SDWIS['SampleID'] = SDWIS['SampleID'].apply(lambda x: str(x),1) 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


Save file to csv

In [121]:
SDWIS.to_csv(file_place.format('chem_by_analyte'),index=False)

Pivot Data 

In [122]:
SDWISp = SDWIS.drop_duplicates(subset=['SampleID','parm_cd'])
SDWISpiv = SDWISp.pivot(index='SampleID', columns = 'parm_cd', values = 'res_w_rmk')

In [123]:
SDWISpivTot = pd.merge(SDWISpiv,SDWIS,left_index=True,right_on='SampleID',how='left')

In [124]:
SDWISpivTot.drop([u'',u'SampType', u'Param', u'result_va', u'Unit', 
                  u'MDL', u'MDLUnit', u'CAS_Reg',u'ID_NUM', 
                  u'remark_cd', u'parm_cd', u'res_w_rmk'],inplace=True,axis=1)

In [125]:
def getlessthan(x):
    if "<" in str(x):
        return "<", 'TRUE', float(x[1:])
    elif x is None:
        return "", 'FALSE', ""
    else:
        return "", 'FALSE', x

standard_list = ['stid','OrgID','OrgName','SampleID','dates','AnalysisDate','SamplePointComment','sme','ac','year','database','tmdtm']
    
for col in SDWISpivTot.columns:
    if col not in standard_list:
        SDWISpivTot["r"+col], SDWISpivTot["r"+col+'tf'], SDWISpivTot[col] =\
        zip(*SDWISpivTot[col].apply(lambda x: getlessthan(x),1))

In [126]:
SDWISpivTot['lat'] = 0
SDWISpivTot['lon'] = 0
SDWISpivTot['wd'] = 0
SDWISpivTot['rwd'] = 0
SDWISpivTot.columns

Index(['620', '630', 'alk', 'as', 'bo', 'br', 'ca', 'cl', 'co3', 'ds', 'f',
       'fe', 'hard', 'hco3', 'k', 'mg', 'na', 'op', 'ph', 'sc', 'si', 'so4',
       'temp', 'turb', 'ur', 'stid', 'OrgID', 'OrgName', 'SampleID', 'dates',
       'times', 'AnalysisDate', 'SamplePointComment', 'sme', 'ac', 'year',
       'database', 'tmdtm', 'r620', 'r620tf', 'r630', 'r630tf', 'ralk',
       'ralktf', 'ras', 'rastf', 'rbo', 'rbotf', 'rbr', 'rbrtf', 'rca',
       'rcatf', 'rcl', 'rcltf', 'rco3', 'rco3tf', 'rds', 'rdstf', 'rf', 'rftf',
       'rfe', 'rfetf', 'rhard', 'rhardtf', 'rhco3', 'rhco3tf', 'rk', 'rktf',
       'rmg', 'rmgtf', 'rna', 'rnatf', 'rop', 'roptf', 'rph', 'rphtf', 'rsc',
       'rsctf', 'rsi', 'rsitf', 'rso4', 'rso4tf', 'rtemp', 'rtemptf', 'rturb',
       'rturbtf', 'rur', 'rurtf', 'rtimes', 'rtimestf', 'lat', 'lon', 'wd',
       'rwd'],
      dtype='object')

In [127]:
template = pd.read_excel("C:/Users/PAULINKENBRANDT/Downloads/nwis_format (1).xlsx")

In [128]:
new_cols = []
temp_cols = list(template.columns) + ['620','630','OrgName', 'SampleID','AnalysisDate', 'SamplePointComment']
for col in temp_cols:
    if col in SDWISpivTot.columns:
        new_cols.append(col)
SDWISpivTot.drop_duplicates(subset=['SampleID'],inplace=True)

In [129]:
SDWISpivTot.to_csv(file_place.format('chem_by_sample'),index=False)

## SDWIS Stations

In [45]:
StationQuery = """
SELECT 
UTV80.TINWSYS.TINWSYS_IS_NUMBER AS "OrgID",
UTV80.TINWSYS.NAME AS "OrgName",
UTV80.TINWSF.ST_ASGN_IDENT_CD AS "StateFacID",
UTV80.TINWSYS.ALTERNATE_ST_NUM AS "StateSysID",
UTV80.TINWSF.TINWSF_IS_NUMBER AS "site_no",
UTV80.TINWSF.NAME AS "station_nm",
UTV80.TINWSF.TYPE_CODE AS "site_tp_cd",
UTV80.TINLOC.LAT_DMS_MSR AS "lat_va",
UTV80.TINLOC.LONG_DMS_MSR AS "long_va",
UTV80.TINLOC.LATITUDE_MEASURE AS "dec_lat_va",
UTV80.TINLOC.LONGITUDE_MEASURE AS "dec_long_va",
UTV80.TINLOC.HZ_COLLECT_METH_CD AS "coord_meth_cd",
UTV80.TINLOC.HORIZ_ACCURACY_MSR AS "coord_acy_cd",
UTV80.TINLOC.HORIZ_REF_DATUM_CD AS "coord_datum_cd",
UTV80.TINLOC.HORIZ_REF_DATUM_CD AS "dec_coord_datum_",
UTV80.TINLOC.SOURCE_MAP_SCALE AS "map_scale_fc",
UTV80.TINLOC.VERTICAL_MEASURE AS "alt_va",
UTV80.TINLOC.VER_COL_METH_CD AS "alt_meth_cd",
UTV80.TINLOC.VERT_ACCURACY_MSR AS "alt_acy_va",
UTV80.TINLOC.VERT_REF_DATUM_CD AS "alt_datum_cd",
UTV80.TINWSF.CONSTRUCTED_DATE AS "construction_dt",
UTV80.TINWSF.ACTIVITY_DATE AS "ActDate",
UTV80.TINWSF.ACTIVITY_STATUS_CD AS "ActStatus"
 
FROM
UTV80.TINWSF
JOIN UTV80.TINWSYS ON  UTV80.TINWSF.TINWSYS_IS_NUMBER = UTV80.TINWSYS.TINWSYS_IS_NUMBER  
JOIN UTV80.TINLOC ON  UTV80.TINWSF.TINWSF_IS_NUMBER = UTV80.TINLOC.TINWSF_IS_NUMBER 
 
WHERE 
UTV80.TINWSF.TYPE_CODE IN('WL') 
AND UTV80.TINLOC.LATITUDE_MEASURE != 0
"""

In [46]:
stations = pd.read_sql(StationQuery, con = connection)

In [47]:
def getsrcid(x):
    return str(x[1]).zfill(5) + '-' + str(x[2][-2:])

In [48]:
idmatch = pd.read_csv("E:/Google Drive/WORK/Groundwater Chemistry/SDWIS/WellIDMatch.csv")
idmatch.set_index('ESN',inplace=True)
id_dict = idmatch.to_dict()['SourceID']

In [49]:
def refine_sid(x):
    sidraw = id_dict.get(x[0],getsrcid(x))
    if len(sidraw)>8:
        return str(sidraw[:-3]).zfill(5)+str(int(sidraw[-3:])).zfill(2)
    else:
        return sidraw
stations['DEHN_source_id'] = stations[['site_no','StateSysID','StateFacID']]\
.apply(lambda x: refine_sid(x), 1)

In [50]:
engine = oraconnection.getEngine()

In [51]:
mySQL = """SELECT * FROM water_rights_data.sources"""

In [52]:
sources = pd.read_sql(mySQL,con=engine)

In [53]:
wrjoin = pd.merge(stations, sources, on='DEHN_source_id',how='left')
withwin = wrjoin[wrjoin['win'].notnull()]

In [54]:
SQL = """
SELECT * FROM water_rights_data.{:}
WHERE win IN('{:}')
"""
winstrlist = "','".join(map(str,withwin['win'].values))

In [55]:
construct = pd.read_sql(SQL.format('construction',winstrlist),con=engine)
borehole = pd.read_sql(SQL.format('borehole',winstrlist),con=engine)
screen = pd.read_sql(SQL.format('wellscreens',winstrlist),con=engine)

In [56]:
winproj = pd.read_csv("E:/Google Drive/WORK/Groundwater Chemistry/wrpod_WIN.txt")
winprojSDWIS = winproj[winproj['WIN'].isin(withwin['win'].astype(int).values)]

In [57]:
wellfeats = pd.read_csv("E:/Google Drive/WORK/Groundwater Chemistry/wrpod_wellfeatures.txt",
                        error_bad_lines=False)
wellfeats.set_index('win',inplace=True)

b'Skipping line 30451: expected 7 fields, saw 8\n'


In [58]:
def getdepth(df,kp = 'last'):
    df.sort_values('To_ft',inplace=True)
    df.drop_duplicates(subset='WIN',keep=kp,inplace=True)
    df.set_index('WIN',inplace=True)
    return df['To_ft'].to_dict()

bdepth = getdepth(borehole)
sdepth = getdepth(screen, 'first')

In [59]:
def winconv(x):
    if pd.isnull(x):
        return x
    else:
        return int(x)

wrjoin['win'] = wrjoin['win'].apply(lambda x: winconv(x),1)
depthdict = wellfeats['totalDepth'].to_dict()
sdepthdict = wellfeats['screenDepth'].to_dict()
wrjoin['depth'] = wrjoin['win'].apply(lambda x: depthdict.get(x,bdepth.get(x,np.nan)))
wrjoin['screen_depth'] = wrjoin['win'].apply(lambda x: sdepthdict.get(x,sdepth.get(x,np.nan)))

In [60]:
wrjoin.drop_duplicates(subset=['site_no'],inplace=True)
print(len(wrjoin))
print(len(wrjoin[wrjoin['depth'].notnull()]),len(wrjoin[wrjoin['screen_depth'].notnull()]))


2385
310 310


In [61]:
wrjoin['alt_meth_cd'] = 'N'
wrjoin['altacu'] = 10
wrjoin['alt_datum_cd'] = "NAVD88"
wrjoin['altitude'] = wrjoin[['dec_long_va','dec_lat_va']].apply(lambda x: wa.get_elev(x, units='Feet'),1)


JSONDecodeError: ('Expecting value: line 1 column 1 (char 0)', 'occurred at index 2928')

In [None]:
def huc(x):
    try:
        huc, nm = wa.get_huc(x)
        return huc, nm
    except:
        pass
        return '',''

wrjoin['huc'], wrjoin['huc_name'] =\
zip(*wrjoin[['dec_long_va','dec_lat_va']].apply(lambda x: huc(x),1))
wrjoin['usgs_num_guess'] = wrjoin[['dec_long_va','dec_lat_va']].apply(lambda x: wa.USGSID(x),1)

In [None]:
wrjoin.head()

In [63]:
wrabb = wrjoin[['site_no','depth','dec_long_va','dec_lat_va']]
wrabb.set_index('site_no',inplace=True)


In [67]:
wdict = wrabb.to_dict()

In [None]:
wrjoin.to_csv(file_place.format('stations'))

In [130]:
SDWISpivTot['lat'] = SDWISpivTot['stid'].apply(lambda x: wrabb['dec_lat_va'].get(x), 1)
SDWISpivTot['lon'] = SDWISpivTot['stid'].apply(lambda x: wrabb['dec_long_va'].get(x), 1)
SDWISpivTot['wd'] = SDWISpivTot['stid'].apply(lambda x: wrabb['depth'].get(x), 1)

In [132]:
new_cols = []
temp_cols = list(template.columns) + ['602','603','OrgName', 'SampleID','AnalysisDate', 'SamplePointComment']
for col in temp_cols:
    if col in SDWISpivTot.columns:
        new_cols.append(col)
spt = SDWISpivTot[new_cols]
spt.to_csv(file_place.format('piv_output'), index=False)

In [None]:
stations_chem = pd.merge(SDWISpivTot, wrjoin, left_on = 'stid', right_on = 'site_no')

# USGS Format and Table

Mainly for the NWIS Schema

In [None]:
columns = ['agency_cd','site_no','sample_dt','sample_tm','sample_end_dt','sample_end_tm',
           'sample_start_time_datum_cd','tm_datum_rlbty_cd','coll_ent_cd','medium_cd',
           'tu_id','body_part_id','r00003','p00003','r00010','p00010','r00094','p00094',
           'r00400','p00400','r00631','p00631','r00671','p00671','r00900','p00900','r00915',
           'p00915','r00925','p00925','r00930','p00930','r00935','p00935','r00940','p00940',
           'r00945','p00945','r00950','p00950','r00955','p00955','r01000','p01000','r01046',
           'p01046','r22703','p22703','r70300','p70300','r71870','p71870','r90410','p90410']

In [None]:
usgs_piv_path = 'E:/Google Drive/WORK/Groundwater Chemistry/USGS_data/qwdata_pivot'
pd.read_csv(usgs_piv_path, sep = '\t',skiprows=12438,names=columns)

In [None]:
['']

In [None]:
SQL = """
SELECT * FROM UTV80.TSAMCSMP 
WHERE 
UTV80.TSAMCSMP.FIELD_TEMP_MSR > 0 OR 
UTV80.TSAMCSMP.FIELD_PH_MEASURE > 0
"""

In [None]:
df_ora