# Create input files for interpolation tools in Matlab or Python from Rain Simulator output

* This script takes the outputs of the rain simulator and combines them to create 2 .csv files which are compatible to the code of IE algorithm in Matlab.
* The names of the output files must be: `sim_out_0.csv`, `sim_out_1.csv`... `sim_out_N.csv`

In [1]:
in_path = '/Volumes/0543970348/IE_directory/aviv/test/'

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import glob
import sys
# the equivalent to "Add to path" in Matlab
sys.path.append("/Users/adameshel/Documents/Python_scripts/wrf_hydro_pyscripts/") 
from helper_functions import split_at

num_of_sim_timestamps = len(glob.glob(in_path + 'sim_out_000_*_Z.csv'))
num_of_link_iter = len(glob.glob(in_path + 'sim_out_*_000_Z.csv'))

f = open(in_path + "/list_of_runParam.txt", "r")
for line in f:
    if 'quantization:' in line:
        quantization = float(line.split()[-2])
        break
f.close()

for j in range(num_of_link_iter):
    # create the dataframe by an example of the first sim_out files for every j (links setting)
    start_date = '2016-01-23 12:00:00'
    jj = split_at(str(format(j/1000, '.3f')),'.',1)[-1]
    example_csv = pd.read_csv(str(in_path + 'sim_out_' + str(jj) + '_000.csv'))
    timestamps = pd.date_range(start=start_date, 
                               periods=num_of_sim_timestamps, 
                               freq='H')
    df_simData = pd.DataFrame(index=timestamps, 
                              columns=example_csv.Link_num.values)
    df_simData.index.rename('time', 
                            inplace=True)

    # Create a liks-rain intensity for every i (rain snapshots)
    for i in range(num_of_sim_timestamps):  
        ii = split_at(str(format(i/1000, '.3f')),'.',1)[-1]
        csv_str = str(in_path + 'sim_out_' + str(jj) + '_' + str(ii) + '.csv')
        df_temp = pd.read_csv(csv_str)
        # compute df_temp.Rain1 externally of rainSim (Hagit's request)
        df_temp['my_rain1'] = (df_temp.A_1.values.clip(min=0)/(df_temp.ITU_a1.values*\
                                                              df_temp.Length.values))**\
                                                            (1/df_temp.ITU_b1.values)
        d = dict(zip(df_temp.Link_num, round(df_temp.my_rain1, 3)))
        df_simData.iloc[i] = pd.Series(d)
    df_simData.to_csv(in_path + 'links_rainrate_mm_h_' + str(jj) + '.csv')
    
    # create metadata df for every j
    df_links_lat_lon = pd.DataFrame(index=example_csv['Link_num'], 
                                    columns= ['lat1', 'lon1', 'lat2', 'lon2', 'a', 'b', 'L', 'noise'])
    df_links_lat_lon.index.name = 'link'

    df_links_lat_lon['lat1'] = example_csv['ya'].values
    df_links_lat_lon['lon1'] = example_csv['xa'].values
    df_links_lat_lon['lat2'] = example_csv['yb'].values
    df_links_lat_lon['lon2'] = example_csv['xb'].values
    df_links_lat_lon['a'] = example_csv['ITU_a1'].values
    df_links_lat_lon['b'] = example_csv['ITU_b1'].values
    df_links_lat_lon['L'] = example_csv['Length'].values
    df_links_lat_lon['noise'] = quantization # 0.3

    df_links_lat_lon.to_csv(in_path + 'links_metadata_' + str(jj) + '.csv')

Restart Kernel

In [None]:
import sys
sys.path.append("../wrf_hydro_pyscripts/")
from helper_functions import restartkernel

restartkernel()

____

____

# OR:

## Use this only if you used the output of the script `create_rgs_from_links.ipynb` in RainSim
### Artificial net of RG from a net of links
This is needed for Hagit's paper where the performances of a non-real RG net is compared with a link net

In [None]:
# # create fake rain gauges files to run IE with links only
# timestamps = pd.date_range(start=start_date, 
#                            periods=num_of_sim_timestamps, 
#                            freq='H')
# df_rg_fake = pd.DataFrame(index=timestamps, 
#                           columns=['gauge1', 'gauge2'])
# df_rg_fake.index.rename('time', 
#                       inplace=True)
# df_rg_fake['gauge1'] = np.zeros([len(timestamps),1])
# df_rg_fake['gauge2'] = np.zeros([len(timestamps),1])

# df_rg_fake.to_csv(in_path + 'dataRg_fake.csv')



# # fake metadata file
# df_rg_lat_lon_fake = pd.DataFrame(index= [0,1], 
#                                   columns= ['lat', 'lon', 'name', 'noise'])
# df_rg_lat_lon_fake.index.name = 'gauge'
# df_rg_lat_lon_fake['name'] = ['gauge1', 'gauge2']
# df_rg_lat_lon_fake['lat'] = [39.00000, 49.00000]
# df_rg_lat_lon_fake['lon'] = [59.00000, 69.00000]
# df_rg_lat_lon_fake['noise'] = 0.1

# df_rg_lat_lon_fake.to_csv(in_path + 'rg_lat_lon_fake.csv')



# df8 = df_temp[np.abs((df_temp['xa'] - df_temp['xb']) < 1e-6) & np.abs((df_temp['ya'] - df_temp['yb']) < 1e-6)]


in_path = '/Volumes/0543970348/IE_directory/22022016_24h_data/test/'

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import glob

num_of_sim_timestamps = len(glob.glob(in_path + 'sim_out_0_*_Z.csv'))
num_of_link_iter = len(glob.glob(in_path + 'sim_out_*_0_Z.csv'))

for j in range(num_of_link_iter):
    # create the dataframe by an example of the first sim_out files for every j (links setting)
    start_date = '2016-01-23 12:00:00'
    example_csv = pd.read_csv(str(in_path + 'sim_out_' + str(j) + '_0.csv'))
    example_csv_links = example_csv[(example_csv['xa'] - example_csv['xb'] == 0) & \
                                    (example_csv['ya'] - example_csv['yb'] == 0)]
    example_csv_rg = example_csv[(example_csv['xa'] - example_csv['xb'] == 0) & \
                                 (example_csv['ya'] - example_csv['yb'] == 0)]
    example_csv_rg.Link_num = range(len(example_csv_rg))
    timestamps = pd.date_range(start=start_date, 
                               periods=num_of_sim_timestamps, 
                               freq='H')
    df_simData_links = pd.DataFrame(index=timestamps, 
                              columns=example_csv_links.Link_num.values)
    df_simData_links.index.rename('time', 
                            inplace=True)
    df_simData_rg = pd.DataFrame(index=timestamps, 
                              columns=example_csv_rg.Link_num.values)
    df_simData_rg.index.rename('time', 
                            inplace=True)

    # Create a liks-rain intensity for every i (rain snapshots)
    for i in range(num_of_sim_timestamps):   
        csv_str = str(in_path + 'sim_out_' + str(j) + '_' + str(i) + '.csv')
        df_temp = pd.read_csv(csv_str)
        
        df_temp_links = example_csv[(example_csv['xa'] - example_csv['xb'] == 0) & \
                                    (example_csv['ya'] - example_csv['yb'] == 0)]
        df_temp_rg = example_csv[(example_csv['xa'] - example_csv['xb'] == 0) & \
                                 (example_csv['ya'] - example_csv['yb'] == 0)]
        
        d_links = dict(zip(df_temp_links.Link_num, round(df_temp_links.Rain1, 3)))
#         d_rg = dict(zip(df_temp_rg.Link_num, round(df_temp_rg.A_1, 3)))
#         df_simData_links.iloc[i] = pd.Series(d_links)
#         df_simData_rg.iloc[i] = pd.Series(d_rg)
        
#     df_simData_links.to_csv(in_path + 'links_rainrate_mm_h_' + str(j) + '.csv')
#     df_simData_rg.to_csv(in_path + 'rg_rainrate_mm_h_' + str(j) + '.csv')

    
#     # create metadata df for every j
#     df_links_lat_lon = pd.DataFrame(index=example_csv['Link_num'], 
#                                     columns= ['lat1', 'lon1', 'lat2', 'lon2', 'a', 'b', 'L', 'noise'])
#     df_links_lat_lon.index.name = 'link'

#     df_links_lat_lon['lat1'] = example_csv['ya'].values
#     df_links_lat_lon['lon1'] = example_csv['xa'].values
#     df_links_lat_lon['lat2'] = example_csv['yb'].values
#     df_links_lat_lon['lon2'] = example_csv['xb'].values
#     df_links_lat_lon['a'] = example_csv['ITU_a1'].values
#     df_links_lat_lon['b'] = example_csv['ITU_b1'].values
#     df_links_lat_lon['L'] = example_csv['Length'].values
#     df_links_lat_lon['noise'] = 0.1

#     df_links_lat_lon.to_csv(in_path + 'links_metadata_' + str(j) + '.csv')

Not in use untill rain gauses data is ordered and integrated

In [31]:
# # create fake rain gauges files to run IE with links only
# timestamps = pd.date_range(start=start_date, 
#                            periods=num_of_sim_timestamps, 
#                            freq='H')
# df_rg_fake = pd.DataFrame(index=timestamps, 
#                           columns=['gauge1', 'gauge2'])
# df_rg_fake.index.rename('time', 
#                       inplace=True)
# df_rg_fake['gauge1'] = np.zeros([len(timestamps),1])
# df_rg_fake['gauge2'] = np.zeros([len(timestamps),1])

# df_rg_fake.to_csv(in_path + 'dataRg_fake.csv')



# # fake metadata file
# df_rg_lat_lon_fake = pd.DataFrame(index= [0,1], 
#                                   columns= ['lat', 'lon', 'name', 'noise'])
# df_rg_lat_lon_fake.index.name = 'gauge'
# df_rg_lat_lon_fake['name'] = ['gauge1', 'gauge2']
# df_rg_lat_lon_fake['lat'] = [39.00000, 49.00000]
# df_rg_lat_lon_fake['lon'] = [59.00000, 69.00000]
# df_rg_lat_lon_fake['noise'] = 0.1

# df_rg_lat_lon_fake.to_csv(in_path + 'rg_lat_lon_fake.csv')

# Data in directory is now ready to be used in IE code (`imap.m` in Matlab)
* no need to go through `imapFileReader.m`

In [1]:
%who

Interactive namespace is empty.
