# To prepare input files for boundary conditions: mainly for temperaure timeseries

    - Author Niccolo` Tubini

    - Licence Creative Commons 4.0

In [1]:
%load_ext autoreload
%autoreload 2


from gf.io import gf_io
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from plotly import graph_objs as go
import calendar
from plotly.offline import plot, iplot
import plotly.graph_objs as go
from random import gauss
from random import uniform

oms_project_path = os.path.dirname(os.getcwd())

In [2]:
from datetime import datetime
def write_OMS_timeseries(df, file_name, **kwargs):
    '''
    Save a timeseries dataframe to .csv file with OMS format
   
    :param df: dataframe containing the timeseries. Each column correspond to a station/centroid and the 
    the header contains the ID of the station/centroid.
    :type df: pandas.dataframe
   
    :param file_name: output file name.
    :type file_name: str
    
    :param \**kwargs:
    See below
    
    :Keyword Arguments
        * *has_datetime* bool, default True
             if the dataframe has a datetime column True, otherwise False
            
        * *start_date* str, '01-01-2020 00:00' 
            start date of the timeseries. 'mm-dd-yyyy hh:mm'
            
        * *frequency* str, default '1H'    
            frequency of the timeseries. 'H': hourly, 'D': daily

    @author: Niccolò Tubini
    
    Notes:
    2021-01-09 changed pd.date_range with pd.period_range 
    https://stackoverflow.com/questions/50265288/how-to-work-around-python-pandas-dataframes-out-of-bounds-nanosecond-timestamp
    '''
    has_datetime = kwargs.get('has_datetime',True)
    start_date = kwargs.get('start_date','01-01-2020 00:00')
    frequency = kwargs.get('frequency','1H')

    if has_datetime==True:
        df.reset_index(inplace=True)
#         df.iloc[0,:] = df.iloc[0,:].strftime('%Y-%m-%d %H:%M')
        df = df.astype(str)
    else:
        date_rng = pd.period_range(start=start_date, periods=df.shape[0], freq=frequency).strftime('%Y-%m-%d %H:%M')
        df_dates = pd.DataFrame(date_rng, columns=['date'])
        df = pd.concat([df_dates, df],sort=False, axis=1)
    
    df.replace('nan','-9999',inplace = True)
    df.replace('-9999.0','-9999',inplace = True)
    
    n_col = df.shape[1]
    value = []
    ID = []
    double = []
    commas = []
    for i in range(1,n_col):
        value.append(',value_'+str(df.columns[i]))
        ID.append(','+str(df.columns[i]))
        double.append(',double')
        commas.append(',')
   
    line_4 = '@H,timestamp'+''.join(value) + '\n'
    line_5 = 'ID,'+''.join(ID) + '\n'
    line_6 = 'Type,Date' + ''.join(double) + '\n'
    line_7 = 'Format,yyyy-MM-dd HH:mm' + ''.join(commas) + '\n'

    date = datetime.today().strftime('%Y-%m-%d %H:%M')
    df.insert(loc=0, column='-', value=np.nan)
    with open(file_name,'w') as file:
        file.write('@T,table\nCreated,'+ date +'\nAuthor,HortonMachine library\n')
        file.write(line_4)
        file.write(line_5)
        file.write(line_6)
        file.write(line_7)
    print(df.head())
    df.to_csv(file_name, header=False, index=False, mode="a", date_format='%Y-%m-%d %H:%M')
    print ('\n\n***SUCCESS writing!  '+ file_name)

In [3]:
dates_range = pd.period_range('0001-01-01 00:00', '9999-01-01', freq='1D')
str_dates_range = pd.period_range('0001-01-01 00:00', '9999-01-01', freq='1D').strftime('%Y-%m-%d %H:%M')

In [7]:
dates_range = pd.date_range('2020-01-01 00:00', '2020-01-31', freq='1min')
dates_range

DatetimeIndex(['2020-01-01 00:00:00', '2020-01-01 00:01:00',
               '2020-01-01 00:02:00', '2020-01-01 00:03:00',
               '2020-01-01 00:04:00', '2020-01-01 00:05:00',
               '2020-01-01 00:06:00', '2020-01-01 00:07:00',
               '2020-01-01 00:08:00', '2020-01-01 00:09:00',
               ...
               '2020-01-30 23:51:00', '2020-01-30 23:52:00',
               '2020-01-30 23:53:00', '2020-01-30 23:54:00',
               '2020-01-30 23:55:00', '2020-01-30 23:56:00',
               '2020-01-30 23:57:00', '2020-01-30 23:58:00',
               '2020-01-30 23:59:00', '2020-01-31 00:00:00'],
              dtype='datetime64[ns]', length=43201, freq='T')

In [12]:
data = np.zeros(len(dates_range))
water_pond = 0.02

for i in range(0,len(dates_range)):  
    data[i] = water_pond


In [13]:
df = pd.DataFrame(dates_range, columns=['Datetime'])
df.head()

Unnamed: 0,Datetime
0,2020-01-01 00:00:00
1,2020-01-01 00:01:00
2,2020-01-01 00:02:00
3,2020-01-01 00:03:00
4,2020-01-01 00:04:00


In [15]:
df['0'] = data

df.head()

Unnamed: 0,Datetime,0
0,2020-01-01 00:00:00,0.02
1,2020-01-01 00:01:00,0.02
2,2020-01-01 00:02:00,0.02
3,2020-01-01 00:03:00,0.02
4,2020-01-01 00:04:00,0.02


In [17]:
df.tail()

Unnamed: 0,Datetime,0
43196,2020-01-30 23:56:00,0.02
43197,2020-01-30 23:57:00,0.02
43198,2020-01-30 23:58:00,0.02
43199,2020-01-30 23:59:00,0.02
43200,2020-01-31 00:00:00,0.02


In [18]:
write_OMS_timeseries(df.iloc[:,1:], oms_project_path + '/data/Timeseries/ex00_water_ponding.csv', has_datetime=False, start_date='01-01-2020 00:00', frequency='1min' )

    -              date     0
0 NaN  2020-01-01 00:00  0.02
1 NaN  2020-01-01 00:01  0.02
2 NaN  2020-01-01 00:02  0.02
3 NaN  2020-01-01 00:03  0.02
4 NaN  2020-01-01 00:04  0.02


***SUCCESS writing!  C:\Users\Niccolo\OMS\OMS_Project_WHETGEO1D_SummerSchool2021/data/Timeseries/ex00_water_ponding.csv


In [19]:
df.set_index('Datetime', inplace=True)
write_OMS_timeseries(df, oms_project_path + '/data/Timeseries/ex00_water_ponding_1.csv', has_datetime=True)

    -             Datetime     0
0 NaN  2020-01-01 00:00:00  0.02
1 NaN  2020-01-01 00:01:00  0.02
2 NaN  2020-01-01 00:02:00  0.02
3 NaN  2020-01-01 00:03:00  0.02
4 NaN  2020-01-01 00:04:00  0.02


***SUCCESS writing!  C:\Users\Niccolo\OMS\OMS_Project_WHETGEO1D_SummerSchool2021/data/Timeseries/ex00_water_ponding_1.csv
