In [90]:
import copy
import pandas as pd
import numpy as np
import pickle as pkl 
import sys 
from pathlib import Path
from os.path import basename
from timezonefinder import TimezoneFinder
import geopy.geocoders, pytz, certifi, ssl, datetime, ephem, math
from datetime import datetime as dt

In [72]:
proj_dir = '/home/NearingLab/projects/jmframe/lstm_camels/'
data_dir = proj_dir + 'data/'
nldas_dir = data_dir + 'basin_dataset_public_v1p2/basin_mean_forcing/nldas_extended/'
chrt_daily_dir = '/home/NearingLab/data/nwm/v2/temp/CHRT_daily_means/'
chrt_year_dir = '/home/NearingLab/data/nwm/v2/temp/CHRT_OUT/'

In [17]:
if False: # Open the attributes from the coding workshop. These are already set up to do regression
    openthis = '/home/NearingLab/data/camels_all_coding_workshop.csv'
    attributes = pd.read_csv(openthis, sep=',', index_col='gauge_id')
    # These are bad for the regression analysis.
    attributes = attributes.drop(drop_these, axis=1)
else: # Open a slightly more extrnsive data set.
    openthis = '/home/NearingLab/data/camels_attributes_v2.0/camels_all.txt'
    attributes = pd.read_csv(openthis, sep=';', index_col='gauge_id')
    
# Add the basin ID as a 8 element string with a leading zero if neccessary
basin_id_str = []
for a in attributes.index.values:
    basin_id_str.append(str(a).zfill(8))
attributes['basin_id_str'] = basin_id_str

# Get the hydrologic units for each basin.
with open(data_dir + 'usgs_site_info.csv', 'r') as f:
    usgs_sites = pd.read_csv(f, skiprows=24, index_col='site_no')
usgs_idx_int = []
for idx in usgs_sites.index.values:
    usgs_idx_int.append(int(idx))
usgs_sites.reindex(usgs_idx_int)
usgs_sites = usgs_sites.reindex(usgs_idx_int)
basin_hydro_unit = []
for b in attributes.basin_id_str.values:
    huc_cd = usgs_sites.loc[int(b),'huc_cd']
    hu = '{:08d}'.format(huc_cd)
    basin_hydro_unit.append(hu[0:2])
attributes['basin_hydro_unit'] = basin_hydro_unit
# Add time zone
tf = TimezoneFinder()
basin_time_zone = []
for b in attributes.index.values:
    basin_time_zone.append(tf.timezone_at(lng=attributes.loc[b,'gauge_lon'], lat=attributes.loc[b,'gauge_lat']))
attributes['time_zone'] = basin_time_zone

In [25]:
# https://stackoverflow.com/questions/51738137/...
# python-finding-local-mean-time-adjusted-for-the-distance-in-longitude-from-the
def position(city, state, country):

    """

    :param city: String of city Ex. Chattannoga
    :param state: String of state Ex. TN
    :param country: String USA
    :return: latitude and longitude

    """

    ctx = ssl.create_default_context(cafile=certifi.where())
    geopy.geocoders.options.default_ssl_context = ctx
    geo_locator = geopy.geocoders.Nominatim(user_agent="my-application", scheme='http')
    location = geo_locator.geocode(city + ' ' + state + ' ' + country)

    return location.longitude, location.latitude


def timezone(longitude, latitude):

    """

    :param longitude:
    :param latitude:
    :return: timezone

    """

    tf = TimezoneFinder()
    zone = (tf.certain_timezone_at(lng=longitude, lat=latitude))

    return zone

# def localmeantime(longitude):
#     zones = [-0, -15, -30, -45, -60, -75, -90, -105, -120, -135, -150, -165, -180]
#     x = min(zones, key=lambda x: abs(x - longitude))
#     print(x)



def localtoutc(time, timezo):

    """

    :param time: Ex. "2001-2-3 10:11:12" String
    :param timezo: Ex. America/New_York String
    :return:

    """

    local = pytz.timezone(timezo)
    naive = datetime.datetime.strptime(time, "%Y-%m-%d %H:%M:%S")
    local_dt = local.localize(naive, is_dst=None)
    utc_dt = local_dt.astimezone(pytz.utc)
    return utc_dt


def localmeantime(utc, longitude):
    """
    :param utc: string Ex. '2008-12-2'
    :param longitude: longitude
    :return: Local Mean Time Timestamp
    """
    lmt = utc + datetime.timedelta(seconds=round(4*60*longitude))
    lmt = lmt.replace(tzinfo=None)
    return lmt

In [125]:
with open(chrt_daily_dir+'dynamic_features_v2.p', 'rb') as f:
    chrt_v2 = pkl.load(f)
chrt_v2_loc = copy.deepcopy(chrt_v2)

In [126]:
def add_local_time(chrt_year):
    with open(chrt_year_dir+'dynamic_features_'+str(y)+'.p', 'rb') as f:
        chrt_year = pkl.load(f)
        
    for ib, b in enumerate(chrt_year.keys()):
        df = chrt_year[b]
        longitude = attributes.loc[int(b), 'gauge_lon']
        if ib == 0:
            loc_time = []
            for t in df.index.values:
                t_pd = pd.Timestamp(t)
                t_dt = dt(t_pd.year, t_pd.month, t_pd.day, t_pd.hour)
                loc_time.append(localmeantime(t_dt, longitude))
        df['loc_time'] = loc_time
        df = df.set_index('loc_time')
        df = df.groupby(pd.Grouper(freq='1D')).mean()
        for variable in ['streamflow','q_lateral','velocity','qSfcLatRunoff','qBucket','qBtmVertRunoff']:
            chrt_v2_loc[b].loc[df.index.values[1:], variable] = df[variable]

In [127]:
for y in range(1993, 2019):
    add_local_time(y)

In [137]:
k = list(chrt_v2.keys())[100]
i=1050
j=0
print(chrt_v2[k].iloc[i,j])
print(chrt_v2_loc[k].iloc[i,j])

5.648749873740599
5.45999987795949


In [138]:
with open(chrt_daily_dir+'dynamic_features_v2_loc.p', 'wb') as f:
    pkl.dump(chrt_v2_loc,f)