# Forecasting moss & lichen fractional cover mean
## with a Neural Network using Keras
## (Reading input and output files stored locally as .csv)
## (Writing X_train, X_test, y_train, y_test locally as .csv too)
# For lichen output only

### This notebook uses TensorFlow NGC Container Release 23.03-tf2-py3
### (https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tensorflow)
### and runs on a machine with 4x ARM CPUs (Neoverse N1) and 24GB RAM

In [1]:
# pip install tables

In [2]:
# There is known issue affects aarch64 libgomp, which might sometimes cause 
# "Cannot allocate memory in static TLS block errors.""
# The workaround is to run the following command: 
%env LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libgomp.so.1

env: LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libgomp.so.1


In [3]:
print('Starting imports')
from sklearn.model_selection import train_test_split
import numpy as np
import os
import pandas as pd
print('Finished imports')

Starting imports
Finished imports


In [4]:
# Reading local .csv files
path = '/home/jeani/data/'
input_file = os.path.join(path, 'input_mean_tp1.csv')
print('Reading ', input_file)
input = pd.read_csv(input_file)
print('Read')

print('Input')
print(f'Number of rows: {input.shape[0]:,}')
print(f'Number of columns: {input.shape[1]}')

output_file = os.path.join(path, 'output_mean_tp1.csv')
print('Reading ', output_file)
output = pd.read_csv(output_file)
output = output.drop(columns=['index'])
print('Read')

print('Output')
print(f'Number of rows: {output.shape[0]:,}')
print(f'Number of columns: {output.shape[1]}')

Reading  /home/jeani/data/input_mean_tp1.csv
Read
Input
Number of rows: 3,403
Number of columns: 17522
Reading  /home/jeani/data/output_mean_tp1.csv
Read
Output
Number of rows: 3,403
Number of columns: 2


In [5]:
# Split data into training and test sets, 
X_train, X_test, y_train, y_test = train_test_split(input, output, test_size = 0.2, random_state = 0, shuffle = True)

In [6]:
print('X_train')
X_train
X_train_file = os.path.join(path, 'X_mean_tp1_train.hdf')
X_train.to_hdf(X_train_file, key='df', mode="w", index=False)

print('X_test')
X_test
X_test_file = os.path.join(path, 'X_mean_tp1_test.hdf')
X_test.to_hdf(X_test_file, key='df', mode="w",  index=False)

print('y_train')
y_train
y_train_file = os.path.join(path, 'y_mean_tp1_train.hdf')
y_train.to_hdf(y_train_file, key='dg', mode="w", index=False)

print('y_test')
y_test 
y_test_file = os.path.join(path, 'y_mean_tp1_test.hdf')
y_test.to_hdf(y_test_file, key='dg', mode="w", index=False)

X_train
X_test
y_train
y_test


In [7]:
print(X_test)

        Lichen    N     t2m_0     t2m_1     t2m_2     t2m_3     t2m_4  \
989   0.024091  132  0.971087  0.971371  0.970914  0.968853  0.967404   
686   0.295000    8  0.993326  0.992581  0.991884  0.991502  0.990914   
472   0.175429   35  0.994245  0.994045  0.993928  0.993348  0.992681   
3231  0.152985   67  0.960703  0.961761  0.962833  0.966086  0.972278   
3351  0.220000   88  0.970532  0.973323  0.976747  0.979609  0.981333   
...        ...  ...       ...       ...       ...       ...       ...   
3268  0.042000    5  0.964211  0.965566  0.966683  0.967295  0.969660   
1559  0.040000    2  0.983213  0.981717  0.980603  0.979911  0.979536   
547   0.120000    3  0.996146  0.995740  0.995268  0.994531  0.993795   
3251  0.248549  455  0.964745  0.966638  0.968546  0.970838  0.975005   
228   0.149043  188  0.994862  0.993952  0.993521  0.993340  0.993717   

         t2m_5     t2m_6     t2m_7  ...   tp_8750   tp_8751   tp_8752  \
989   0.965331  0.967756  0.972545  ...  0.101313 

In [8]:
print(y_test)

      new_Lichen  new_N
989     0.079881  253.0
686     0.092609   46.0
472     0.043143  140.0
3231    0.082727   55.0
3351    0.212667  165.0
...          ...    ...
3268    0.053333   12.0
1559    0.030000    2.0
547          NaN    NaN
3251    0.106951   82.0
228     0.067881  335.0

[681 rows x 2 columns]


In [9]:
print('Finished!')

Finished!
