# Merge yearly .hdf files into input & output .csv files

### This notebook uses TensorFlow NGC Container Release 23.03-tf2-py3
### (https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tensorflow)
### and runs on a machine with 4x ARM CPUs (Neoverse N1) and 24GB RAM

In [1]:
print('Imports')
import glob
import os
import pandas as pd

Imports


In [2]:
# Input to .csv
print('Input files')
path = '/home/jeani/data/'
x_filename = 'x_mean_tp1_2*.hdf'
file_list = glob.glob(os.path.join(path, x_filename))
file_list.sort()
file_list

input_filename = os.path.join(path, 'input_mean_tp1.csv')

for file_name in file_list:
    print(file_name)
    df = pd.read_hdf(file_name)
    print(df)
    df = df.drop(columns=['lon_lat'])
    if (file_name == file_list[0]):
         df.to_csv(input_filename, header=True, index=None, sep=',')
    else:
        # Append to file
         df.to_csv(input_filename, mode='a', header=None, index=None, sep=',')

Input files
/home/jeani/data/x_mean_tp1_2015.hdf
        lon_lat    Lichen    N     t2m_0     t2m_1     t2m_2     t2m_3  \
0    1559.06844  0.211879  447  0.996191  0.994671  0.995452  0.996635   
1    1559.06855  0.211091  440  0.989355  0.988203  0.990508  0.992543   
2    1559.06865  0.146966  267  0.986124  0.986423  0.989102  0.991085   
3    1559.06894  0.066429   84  1.004145  1.005221  1.006582  1.006018   
4    1589.06844  0.135455   77  0.992889  0.989019  0.990273  0.992911   
..          ...       ...  ...       ...       ...       ...       ...   
734  3079.06944  0.190000   62  0.989532  0.989124  0.988256  0.987347   
735  3079.06955  0.165333   45  0.991210  0.990799  0.990056  0.989213   
736  3079.06965  0.248843  268  0.993679  0.993266  0.992661  0.991882   
737  3079.06975  0.276389   36  0.996222  0.995807  0.995357  0.994664   
738  3079.07025  0.135278   36  0.999211  0.998496  0.997959  0.997513   

        t2m_4     t2m_5     t2m_6  ...    tp_8750    tp_8751  

In [3]:
# Output to .csv
print('Output files')
y_filename = 'y_mean_tp1_2*.hdf'
file_list = glob.glob(os.path.join(path, y_filename))
file_list.sort()
file_list

output_filename = os.path.join(path, 'output_mean_tp1.csv')

for file_name in file_list:
    print(file_name)
    dg = pd.read_hdf(file_name)
    print(dg)
    dg = dg.reset_index()
    dg = dg.drop(columns=['lon_lat'])
    if (file_name == file_list[0]):
         dg.to_csv(output_filename, header=True, index=None, sep=',')
    else:
        # Append to file
         dg.to_csv(output_filename, mode='a',  header=None, index=None, sep=',')

Output files
/home/jeani/data/y_mean_tp1_2015.hdf
        lon_lat  new_Lichen  new_N
0    1559.06844    0.136917  652.0
1    1559.06855    0.159610  616.0
2    1559.06865    0.113471  291.0
3    1559.06894    0.045974  154.0
4    1589.06844    0.117667  120.0
..          ...         ...    ...
734  3079.06944    0.206731  104.0
735  3079.06955    0.106735   98.0
736  3079.06965    0.232222  207.0
737  3079.06975    0.221667   72.0
738  3079.07025    0.236456   79.0

[739 rows x 3 columns]
/home/jeani/data/y_mean_tp1_2016.hdf
        lon_lat  new_Lichen  new_N
0    1559.06844    0.140494  587.0
1    1559.06855    0.138531  429.0
2    1559.06865    0.089237  249.0
3    1559.06875    0.076878  426.0
4    1559.06894    0.053043  161.0
..          ...         ...    ...
889  3079.06944    0.205729   96.0
890  3079.06955    0.181293  147.0
891  3079.06965    0.050909   22.0
892  3079.06975    0.020000    8.0
893  3079.07025    0.170240  125.0

[894 rows x 3 columns]
/home/jeani/data/y_mean_t

In [4]:
print('Finished!')

Finished!
