In [1]:
import os, pandas as pd, slr_pkg.clean_load_data as cld, slr_pkg.para as para
from slr_pkg.clean_load_data import open_table
from pathlib import Path

In [2]:
# Get current working directory
bp = Path(os.getcwd())

# Set base data directory.
dp  = bp / 'data'

# Set sample data directory.
edf_path = dp / 'geotracker_edf_results'
gama_path = dp / 'gama_results'

# Set location data directory.
geo_xy_path = dp / 'geotracker_xy'
gama_xy_path = dp / "gama_xy"
xy_path = dp / 'xy'

# Set results directory
results_path = bp / "results"

# Ask for county to gather data for.
# area = input('Enter county: ')
areas = ['Ventura','SanDiego', 'Kern', 'Imperial','SantaBarbara','LosAngeles']
# areas = ['LosAngeles']

In [4]:
#locs = pd.read_csv(xy_path / 'all_xy_elev.csv')

for area in areas:

    samples = pd.read_csv(dp / '{}_clean_samples.csv'.format(area))

    samples = samples.merge(locs, left_on='WID', right_on='WID', how='inner')
    samples.to_csv(dp / '{}_clean_samples_elev.csv'.format(area), index=False)

In [4]:
samples = pd.read_csv(dp / '{}_clean_samples_elev.csv'.format(areas[0]))

In [15]:
samples.dtypes

LOGDATE            object
PARLABEL           object
PARVAL            float64
PARVQ              object
UNITS              object
WID                object
GID                object
SID                object
FIELD_PT_CLASS     object
LATITUDE_x        float64
LONGITUDE_x       float64
chem_name          object
comp_conc_val     float64
exceedence           bool
magnitude         float64
OID_                int64
FIELD_PT_C         object
LATITUDE_y        float64
LONGITUDE_y       float64
DEM_ELV           float64
dtype: object

In [10]:
### Depth to Water Data
# Load GAMA dtw data
# Create elev_path.
elev_path = bp / 'elevation'
print(elev_path, '\n')

# Dictionary of data types for gama_elev gama_elev for open_table().
gama_elev_dtypes = {
    'WELL NUMBER' : 'string',
    'DEPTH TO WATER' : 'float64',
    }

# Date column of gama_elev gama_elev for open_table().
gama_elev_date = ['MEASUREMENT DATE']

# Columns of gama_elev gama_elev for open_table().
gama_elev_cols = list(gama_elev_dtypes.keys()) + gama_elev_date


print('Loading GAMA groundwater elevations. \n')

# create list of files to open
gama_elev_files = elev_path.glob('**/*gama*.zip')
gama_elev_files = list(gama_elev_files)

# Use list comprehension to create a list of dataframes from the files list. Uses open_table() to open the files.
gama_elev_list = [open_table(i,dtypes = gama_elev_dtypes,date_cols = gama_elev_date, cols =gama_elev_cols) for i in gama_elev_files]
#print(gama_elev_list)

# Concatenate the list of dataframes into one dataframe if there are more than one.
if len(gama_elev_list) > 1:
    gama_elev = pd.concat(gama_elev_list)

else:
    gama_elev = gama_elev_list[0]

# Dict of attributes to rename.
gama_geo_dict = {
    'WELL NUMBER' : 'WID',
    'DEPTH TO WATER' : 'DTW',
    'MEASUREMENT DATE' : 'LOGDATE',
}
# Rename columns.
gama_elev = gama_elev.rename(columns=gama_geo_dict)

# Fix column formatting.
# gama_elev['LOGDATE'] = gama_elev['LOGDATE'].astype(str)
gama_elev['LOGDATE'] = pd.to_datetime(gama_elev['LOGDATE'].astype(str), errors='coerce', format='%Y-%m-%d')
gama_elev['WID'] = gama_elev['WID'].str.replace(' ', '')

# Create GID (group id) column. GID is the WID and LOGDATE concatenated.
gama_elev['GID'] = list(zip(gama_elev['WID'], gama_elev['LOGDATE']))
# Load Geotracker DTW data.
# Dictionary of data types for geo_elev geo_elev for open_table().
geo_elev_dtypes = {
    'GLOBAL_ID' : 'string',
    'FIELD_POINT_NAME' : 'string',
    'DTW' : 'float64',
    }

# Date column of geo_elev geo_elev for open_table().
geo_elev_date = ['GW_MEAS_DATE']

# Columns of geo_elev geo_elev for open_table().
geo_elev_cols = list(geo_elev_dtypes.keys()) + geo_elev_date

print('Loading Geotracker groundwater elevations. \n')

# create list of files to open
geo_elev_files = elev_path.glob('**/*Geo*.zip')
geo_elev_files = list(geo_elev_files)


# Use list comprehension to create a list of dataframes from the files list. Uses open_table() to open the files.
geo_elev_list = [open_table(i,geo_elev_dtypes,date_cols= geo_elev_date,cols =geo_elev_cols) for i in geo_elev_files]

# Concatenate the list of dataframes into one dataframe if there are more than one.
if len(geo_elev_list) > 1:
    geo_elev = pd.concat(geo_elev_list)

else:
    geo_elev = geo_elev_list[0]

# Create WID column.
geo_elev['WID'] = geo_elev['GLOBAL_ID'] + '-' + geo_elev['FIELD_POINT_NAME']

# Drop unnecessary columns.
geo_elev = geo_elev.drop(columns=['GLOBAL_ID', 'FIELD_POINT_NAME'])

# fix column formatting.
geo_elev['WID'] = geo_elev['WID'].str.replace(' ', '')

# Rename columns.
geo_elev = geo_elev.rename(columns={'GW_MEAS_DATE' : 'LOGDATE'})

# Fix column formatting.
geo_elev['LOGDATE'] = pd.to_timestamp(geo_elev['LOGDATE'].astype(str), errors='coerce', format='%Y-%m-%d')
#geo_elev['LOGDATE'] = geo_elev['LOGDATE'].astype(str)

# Create GID (group id) column. GID is the WID and LOGDATE concatenated.
geo_elev['GID'] = list(zip(geo_elev['WID'], geo_elev['LOGDATE']))

e:\work\projects\coast_slr\scripts\slr_ground_water_quality_\elevation 

Loading GAMA groundwater elevations. 

Loading Geotracker groundwater elevations. 



In [27]:
samples['LOGDATE'] = pd.to_datetime(samples['LOGDATE'].astype(str), errors='coerce', format='%Y-%m-%d')
samples['GID'] = list(zip(samples['WID'], samples['LOGDATE']))

In [30]:
samples['SID'] = list(zip(samples['GID'], samples['PARLABEL']))

In [31]:
samples

Unnamed: 0,LOGDATE,PARLABEL,PARVAL,PARVQ,UNITS,WID,GID,SID,FIELD_PT_CLASS,LATITUDE_x,LONGITUDE_x,chem_name,comp_conc_val,exceedence,magnitude,OID_,FIELD_PT_C,LATITUDE_y,LONGITUDE_y,DEM_ELV
0,2002-05-16,BZME,31600000.0,=,UG/L,T0611100671-MW-1,"(T0611100671-MW-1, 2002-05-16 00:00:00)","((T0611100671-MW-1, 2002-05-16 00:00:00), BZME)",MW,34.218922,-119.195192,Toluene,150.0,True,210665.666667,81632,MW,34.218922,-119.195192,19.1394
1,2010-02-02,BZME,30400.0,=,UG/L,T0611100671-MW-1,"(T0611100671-MW-1, 2010-02-02 00:00:00)","((T0611100671-MW-1, 2010-02-02 00:00:00), BZME)",MW,34.218922,-119.195192,Toluene,150.0,True,201.666667,81632,MW,34.218922,-119.195192,19.1394
2,2009-08-20,BZME,30200.0,=,UG/L,T0611100671-MW-1,"(T0611100671-MW-1, 2009-08-20 00:00:00)","((T0611100671-MW-1, 2009-08-20 00:00:00), BZME)",MW,34.218922,-119.195192,Toluene,150.0,True,200.333333,81632,MW,34.218922,-119.195192,19.1394
3,2009-11-10,BZME,23300.0,=,UG/L,T0611100671-MW-1,"(T0611100671-MW-1, 2009-11-10 00:00:00)","((T0611100671-MW-1, 2009-11-10 00:00:00), BZME)",MW,34.218922,-119.195192,Toluene,150.0,True,154.333333,81632,MW,34.218922,-119.195192,19.1394
4,2005-02-15,BZME,22800.0,=,UG/L,T0611100671-MW-1,"(T0611100671-MW-1, 2005-02-15 00:00:00)","((T0611100671-MW-1, 2005-02-15 00:00:00), BZME)",MW,34.218922,-119.195192,Toluene,150.0,True,151.000000,81632,MW,34.218922,-119.195192,19.1394
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
823853,1982-06-09,F,0.9,,UG/L,04N22W12E001S,"(04N22W12E001S, 1982-06-09 00:00:00)","((04N22W12E001S, 1982-06-09 00:00:00), F)","WATER SUPPLY, OTHER",34.448100,-119.145000,Fluoride,2.0,False,-0.550000,103242,"WATER SUPPLY, OTHER",34.448100,-119.145000,566.6830
823854,1983-08-10,F,0.5,,UG/L,04N22W12E001S,"(04N22W12E001S, 1983-08-10 00:00:00)","((04N22W12E001S, 1983-08-10 00:00:00), F)","WATER SUPPLY, OTHER",34.448100,-119.145000,Fluoride,2.0,False,-0.750000,103242,"WATER SUPPLY, OTHER",34.448100,-119.145000,566.6830
823855,1988-07-22,F,0.4,,UG/L,09N23W31P001S,"(09N23W31P001S, 1988-07-22 00:00:00)","((09N23W31P001S, 1988-07-22 00:00:00), F)","WATER SUPPLY, OTHER",34.818700,-119.378000,Fluoride,2.0,False,-0.800000,110907,"WATER SUPPLY, OTHER",34.818700,-119.378000,1123.9900
823856,1989-08-10,F,0.7,,UG/L,04N22W13F001S,"(04N22W13F001S, 1989-08-10 00:00:00)","((04N22W13F001S, 1989-08-10 00:00:00), F)","WATER SUPPLY, OTHER",34.433600,-119.141000,Fluoride,2.0,False,-0.650000,103239,"WATER SUPPLY, OTHER",34.433600,-119.141000,477.1160


In [28]:
samples.dtypes

LOGDATE           datetime64[ns]
PARLABEL                  object
PARVAL                   float64
PARVQ                     object
UNITS                     object
WID                       object
GID                       object
SID                       object
FIELD_PT_CLASS            object
LATITUDE_x               float64
LONGITUDE_x              float64
chem_name                 object
comp_conc_val            float64
exceedence                  bool
magnitude                float64
OID_                       int64
FIELD_PT_C                object
LATITUDE_y               float64
LONGITUDE_y              float64
DEM_ELV                  float64
dtype: object

In [22]:
gama_elev['LOGDATE'] = pd.Timestamp(gama_elev['LOGDATE'].astype(str))

TypeError: Cannot convert input [0          1993-10-19
1          1994-03-14
2          1994-10-17
3          1995-03-29
4          1995-10-16
              ...    
4229858    2021-11-16
4229859    2021-12-07
4229860    2017-04-04
4229861    2017-04-04
4229862    2017-04-04
Name: LOGDATE, Length: 4229863, dtype: object] of type <class 'pandas.core.series.Series'> to Timestamp

In [46]:
# Concatenate gama_results and edf_results.
dtw = pd.concat([geo_elev, gama_elev])
dtw['dtw_units'] = 'ft'

# List of columns that require a value.
dtw_req_cols = ['WID','DTW','LOGDATE']

# Drops rows with missing values in required columns.
dtw = dtw.dropna(subset=dtw_req_cols)

# Drop duplicate GID rows.
dtw = dtw.drop_duplicates(subset=['GID'])

In [47]:
dtw

Unnamed: 0,LOGDATE,DTW,WID,GID,dtw_units
0,2002-01-14,9.36,T0607302553-MW2,"(T0607302553-MW2, 2002-01-14 00:00:00)",ft
1,2002-01-14,9.61,T0607302553-MW4,"(T0607302553-MW4, 2002-01-14 00:00:00)",ft
2,2002-01-11,9.67,T0607302553-MW6,"(T0607302553-MW6, 2002-01-11 00:00:00)",ft
3,2002-01-14,9.52,T0607302553-MW7,"(T0607302553-MW7, 2002-01-14 00:00:00)",ft
4,2002-01-11,9.23,T0607302553-MW11S,"(T0607302553-MW11S, 2002-01-11 00:00:00)",ft
...,...,...,...,...,...
4229855,2021-08-23,72.01,SL204131495-REW-12,"(SL204131495-REW-12, 2021-08-23 00:00:00)",ft
4229856,2021-09-20,72.00,SL204131495-REW-12,"(SL204131495-REW-12, 2021-09-20 00:00:00)",ft
4229857,2021-10-27,72.17,SL204131495-REW-12,"(SL204131495-REW-12, 2021-10-27 00:00:00)",ft
4229858,2021-11-16,72.20,SL204131495-REW-12,"(SL204131495-REW-12, 2021-11-16 00:00:00)",ft


In [48]:
area

'LosAngeles'

In [49]:
samples = pd.read_csv(dp / '{}_clean_samples_elev.csv'.format('Ventura'))

In [50]:
samples.columns

Index(['LOGDATE', 'PARLABEL', 'PARVAL', 'PARVQ', 'UNITS', 'WID', 'GID', 'SID',
       'FIELD_PT_CLASS', 'LATITUDE_x', 'LONGITUDE_x', 'chem_name',
       'comp_conc_val', 'exceedence', 'magnitude', 'OID_', 'FIELD_PT_C',
       'LATITUDE_y', 'LONGITUDE_y', 'DEM_ELV'],
      dtype='object')

In [51]:
c1 = ['LOGDATE', 'PARLABEL', 'PARVAL', 'PARVQ', 'UNITS', 'WID', 'GID',
       'SID', 'FIELD_PT_CLASS', 'LATITUDE_x', 'LONGITUDE_x', 'chem_name',
       'comp_conc_val', 'exceedence', 'magnitude', 'DEM_ELV']

c_dict = {
    'LATITUDE_x' : 'LATITUDE',
    'LONGITUDE_x' : 'LONGITUDE',
}
samples = samples[c1]
samples = samples.rename(columns=c_dict)

In [60]:
dtw

Unnamed: 0,LOGDATE,DTW,WID,GID,dtw_units
0,2002-01-14,9.36,T0607302553-MW2,"(T0607302553-MW2, 2002-01-14 00:00:00)",ft
1,2002-01-14,9.61,T0607302553-MW4,"(T0607302553-MW4, 2002-01-14 00:00:00)",ft
2,2002-01-11,9.67,T0607302553-MW6,"(T0607302553-MW6, 2002-01-11 00:00:00)",ft
3,2002-01-14,9.52,T0607302553-MW7,"(T0607302553-MW7, 2002-01-14 00:00:00)",ft
4,2002-01-11,9.23,T0607302553-MW11S,"(T0607302553-MW11S, 2002-01-11 00:00:00)",ft
...,...,...,...,...,...
4229855,2021-08-23,72.01,SL204131495-REW-12,"(SL204131495-REW-12, 2021-08-23 00:00:00)",ft
4229856,2021-09-20,72.00,SL204131495-REW-12,"(SL204131495-REW-12, 2021-09-20 00:00:00)",ft
4229857,2021-10-27,72.17,SL204131495-REW-12,"(SL204131495-REW-12, 2021-10-27 00:00:00)",ft
4229858,2021-11-16,72.20,SL204131495-REW-12,"(SL204131495-REW-12, 2021-11-16 00:00:00)",ft


In [53]:
samples_dtw = pd.merge(samples, dtw, on=['GID'], how='left')

In [55]:
print(samples_dtw.dropna(subset=['DTW']))

Empty DataFrame
Columns: [LOGDATE_x, PARLABEL, PARVAL, PARVQ, UNITS, WID_x, GID, SID, FIELD_PT_CLASS, LATITUDE, LONGITUDE, chem_name, comp_conc_val, exceedence, magnitude, DEM_ELV, LOGDATE_y, DTW, WID_y, dtw_units]
Index: []


In [10]:
for area in areas:

    samples = pd.read_csv(dp / '{}_clean_samples_elev.csv'.format(area))
    samples = samples.merge(dtw, on='GID', how='left')
    samples.to_csv(dp / '{}_clean_samples_elev_dtw.csv'.format(area), index=False)


In [16]:
for area in areas:

    samples = pd.read_csv(dp / '{}_clean_samples_elev_dtw.csv'.format(area))
    samples = samples[c1]
    samples = samples.rename(columns=c_dict)
    samples.to_csv(dp / '{}_clean_samples_elev_dtw_r.csv'.format(area), index=False)

In [18]:
samples.dropna(subset=['DTW'], inplace=True)

In [19]:
samples

Unnamed: 0,LOGDATE,PARLABEL,PARVAL,PARVQ,UNITS,WID,GID,SID,FIELD_PT_CLASS,LATITUDE,LONGITUDE,chem_name,comp_conc_val,exceedence,magnitude,DEM_ELV,DTW,dtw_units


In [12]:
c_dict = {
    'LOGDATE'
}

for area in areas:

    samples = pd.read_csv(dp / '{}_clean_samples_elev_dtw.csv'.format(area))
    samples = samples.merge(dtw, on='GID', how='left')
    samples.to_csv(dp / '{}_clean_samples_elev_dtw.csv'.format(area), index=False)

Index(['LOGDATE_x', 'PARLABEL', 'PARVAL', 'PARVQ', 'UNITS', 'WID_x', 'GID',
       'SID', 'FIELD_PT_CLASS', 'LATITUDE_x', 'LONGITUDE_x', 'chem_name',
       'comp_conc_val', 'exceedence', 'magnitude', 'OID_', 'FIELD_PT_C',
       'LATITUDE_y', 'LONGITUDE_y', 'DEM_ELV', 'LOGDATE_y', 'DTW', 'WID_y',
       'dtw_units'],
      dtype='object')

In [13]:
samples.dtypes

LOGDATE_x          object
PARLABEL           object
PARVAL            float64
PARVQ              object
UNITS              object
WID_x              object
GID                object
SID                object
FIELD_PT_CLASS     object
LATITUDE_x        float64
LONGITUDE_x       float64
chem_name          object
comp_conc_val     float64
exceedence           bool
magnitude         float64
OID_                int64
FIELD_PT_C         object
LATITUDE_y        float64
LONGITUDE_y       float64
DEM_ELV           float64
LOGDATE_y          object
DTW               float64
WID_y              string
dtw_units          object
dtype: object