# Feed-forward training

## Imports

In [1]:
import sys
sys.path.append('../')
import thermonets
import torch
import pandas as pd
import datetime
import numpy as np
import matplotlib.pyplot as plt
import pickle


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


## Loading input data

In [2]:
#I load the data generated via `/scripts/generate_nrlmsise00_db.py` and print the columns
df=pd.read_csv('../dbs/nrlmsise00_db.txt',delimiter=',',skipinitialspace=True)
print(f'Database columns are: {df.columns}')

Database columns are: Index(['day', 'month', 'year', 'hour', 'minute', 'second', 'microsecond',
       'alt [km]', 'lat [deg]', 'lon [deg]', 'f107A', 'f107', 'ap',
       'wind zonal [m/s]', 'wind meridional [m/s]', 'density [kg/m^3]'],
      dtype='object')


In [3]:
#some descriptive statistics:
df.describe()

Unnamed: 0,day,month,year,hour,minute,second,microsecond,alt [km],lat [deg],lon [deg],f107A,f107,ap,wind zonal [m/s],wind meridional [m/s],density [kg/m^3]
count,999800.0,999800.0,999800.0,999800.0,999800.0,999800.0,999800.0,999800.0,999800.0,999800.0,999800.0,999800.0,999800.0,999800.0,999800.0,999800.0
mean,15.754151,6.531806,2015.586617,11.585017,29.184137,29.379876,505858.019204,342.516651,0.001633,179.979632,96.727886,96.581916,7.836767,-2.879626,-0.494954,1.205831e-10
std,8.866384,3.453424,4.008351,6.911738,17.378472,17.449451,287608.595307,135.816963,40.109198,104.967288,26.657874,29.821355,7.925704,73.198546,56.008426,2.411612e-10
min,1.0,1.0,2009.0,0.0,0.0,0.0,85.0,158.489319,-90.0,0.0,67.1,64.0,0.0,-276.078125,-311.510712,2.162184e-15
25%,8.0,4.0,2012.0,6.0,14.0,14.0,255869.0,223.876192,-31.007583,87.272727,72.1,71.9,3.0,-60.091662,-29.850723,5.470582e-13
50%,16.0,7.0,2016.0,12.0,29.0,29.0,507462.0,316.235464,0.578755,178.181818,84.9,85.0,5.0,-7.904188,1.651052,8.84338e-12
75%,23.0,10.0,2019.0,18.0,44.0,45.0,754783.0,446.691766,31.007583,269.090909,119.4,114.5,10.0,54.363447,31.471937,1.026746e-10
max,31.0,12.0,2022.0,23.0,59.0,59.0,999964.0,630.957344,90.0,360.0,161.1,262.0,108.0,316.930359,267.179596,1.720909e-09


## Extracting features of interest

In [4]:
#I now construct the day of the year and seconds in day:
years=df['year'].values
months=df['month'].values
days=df['day'].values
hours=df['hour'].values
minutes=df['minute'].values
seconds=df['second'].values
microseconds=df['microsecond'].values
seconds_in_day=hours*3600+minutes*60+seconds+microseconds/1e6
print('seconds in day min and max:')
print(seconds_in_day.max(), seconds_in_day.min())
doys=np.zeros((len(df),))
for i in range(len(df)):
    #date is a string, so I first convert it to datetime:
    date_=datetime.datetime(year=years[i], 
                            month=months[i], 
                            day=days[i],
                            hour=hours[i],
                            minute=minutes[i],
                            second=seconds[i],
                            microsecond=microseconds[i])
    doys[i]=date_.timetuple().tm_yday
print('day of the year min and max:')
print(doys.max(), doys.min())

seconds in day min and max:
86381.805516 5.198408
day of the year min and max:
365.0 1.0


In [5]:
list(df.columns)

['day',
 'month',
 'year',
 'hour',
 'minute',
 'second',
 'microsecond',
 'alt [km]',
 'lat [deg]',
 'lon [deg]',
 'f107A',
 'f107',
 'ap',
 'wind zonal [m/s]',
 'wind meridional [m/s]',
 'density [kg/m^3]']

In [6]:
#I extract the altitude:
alt=df['alt [km]'].values
#I now extract the longitude and latitude, and convert them to radians:
lon=np.deg2rad(df['lon [deg]'].values)
lat=np.deg2rad(df['lat [deg]'].values)
#now the space weather indices:
f107=df['f107'].values
f107a=df['f107A'].values
ap=df['ap'].values
#let's extract the target density as well:
target_density=df['density [kg/m^3]'].values

## Normalization

In [7]:
#this function normalizes the data to the range [-1,1]
def normalize_min_max(data,min_val,max_val):
    normalized_data = (2 * (data - min_val) / (max_val - min_val)) - 1
    return normalized_data
def unnormalize_min_max(data,min_val,max_val):
    unnormalized_data = 1/2 * (data + 1) * (max_val - min_val) + min_val
    return unnormalized_data
#verify: unnormalize_min_max(normalize_min_max(alt,alt.min(),alt.max()),alt.min(),alt.max())==alt

In [8]:
data_normalized={}
data_normalized['sin_lon'] = np.sin(lon)
data_normalized['cos_lon'] = np.cos(lon)
data_normalized['sin_lat'] = np.sin(lat)
data_normalized['sin_sec_in_day'] = np.sin(2*np.pi*seconds_in_day/86400.)
data_normalized['cos_sec_in_day'] = np.cos(2*np.pi*seconds_in_day/86400.)
data_normalized['sin_doy'] = np.sin(2*np.pi*doys/365.25)
data_normalized['cos_doy'] = np.cos(2*np.pi*doys/365.25)
data_normalized['alt_n'] = normalize_min_max(alt, 150., 650.)
data_normalized['f107_n'] = normalize_min_max(f107, 60., 290.)
data_normalized['f107a_n'] = normalize_min_max(f107a, 50., 190.)
data_normalized['ap_n'] = normalize_min_max(ap, 0., 140.)
#I add the non-normalized density & altitude columns (useful to extract during training):
data_normalized['altitude'] = alt  
data_normalized['density'] = target_density
df_normalized=pd.DataFrame(data_normalized)


In [9]:
description_normalized=df_normalized.describe()
description_normalized.iloc[:,:-2]

Unnamed: 0,sin_lon,cos_lon,sin_lat,sin_sec_in_day,cos_sec_in_day,sin_doy,cos_doy,alt_n,f107_n,f107a_n,ap_n
count,999800.0,999800.0,999800.0,999800.0,999800.0,999800.0,999800.0,999800.0,999800.0,999800.0,999800.0
mean,0.000168,0.009967,2.8e-05,-0.013318,-0.001473,-0.008017,0.001431,-0.229933,-0.681896,-0.332459,-0.888046
std,0.703531,0.710595,0.583207,0.709771,0.704306,0.705969,0.708197,0.543268,0.259316,0.380827,0.113224
min,-0.999874,-0.999497,-1.0,-1.0,-1.0,-0.999999,-0.999979,-0.966043,-0.965217,-0.755714,-1.0
25%,-0.690079,-0.701475,-0.515152,-0.727022,-0.700686,-0.714292,-0.707487,-0.704495,-0.896522,-0.684286,-0.957143
50%,0.0,0.015858,0.010101,-0.014907,-0.018921,-0.006451,-0.011826,-0.335058,-0.782609,-0.501429,-0.928571
75%,0.712694,0.723734,0.515152,0.69434,0.707038,0.694452,0.715044,0.186767,-0.526087,-0.008571,-0.857143
max,0.999874,1.0,1.0,1.0,1.0,0.999986,0.999991,0.923829,0.756522,0.587143,0.542857


In [10]:
#cross check that the max is <=1 and the min is >=-1
print(f"maximum and minimum datapoints of the dataset: {description_normalized.iloc[:,:-2].loc['max'].max()}, {description_normalized.iloc[:,:-2].loc['min'].min()}")
print(f"maximum and minimum of target density: {description_normalized.iloc[:,-1].loc['max'].max()}, {description_normalized.iloc[:,-1].loc['min'].min()}")

maximum and minimum datapoints of the dataset: 1.0, -1.0
maximum and minimum of target density: 1.720909357141654e-09, 2.162184111598853e-15


## NN Training

In [11]:
torch_data = torch.tensor(df_normalized.values,
                          dtype=torch.float32)

In [12]:
# NN hyperparameters
device = torch.device('cpu')
batch_size = 4096
model_path = None #pass a path to a model in case you want to continue training
lr = 0.00001
epochs = 100

In [13]:
# Dataloader creation
dataloader = torch.utils.data.DataLoader(torch_data, 
                                         batch_size=batch_size, 
                                         shuffle=True)


In [14]:
#NN creation
model = thermonets.ffnn(input_dim=len(df_normalized.columns)-2,
                        hidden_layer_dims=[32, 32],
                        output_dim=12,
                        mid_activation=torch.nn.Tanh(),
                        last_activation=torch.nn.Tanh()).to(device)

if model_path is not None:
    model.load_state_dict(torch.load(model_path,
                                     map_location=device.type))

In [15]:
#NN training
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = torch.nn.MSELoss()

In [16]:
print(f'Total number of model parameters: {sum(p.numel() for p in model.parameters())}')

Total number of model parameters: 1836


We load the global fit (see notebook: `rho_global_fit.ipynb`: this will be the baseline from which we ask the NN to learn corrections)

In [17]:
with open('/Users/ga00693/Develop/thermonets/global_fits/global_fit_nrlmsise00_180.0-1000.0-4.txt','rb') as f:
    best_global_fit=torch.from_numpy(pickle.load(f)).to(device)

In [18]:
def mean_absolute_percentage_error(y_pred, y_true):
    """
    Compute the mean absolute percentage error (MAPE) between true and predicted values.
    
    Args:
        y_true (`torch.tensor`): True values.
        y_pred (`torch.tensor`): Predicted values.
        
    Returns:
        `torch.tensor`: Mean absolute percentage error.
    """
    return torch.mean(torch.abs((y_true - y_pred) / y_true)) * 100

In [20]:
# Training loop
ratio_losses=[]
rmse_per_minibatch_nn=[]
mape_per_minibatch_nn=[]
rmse_per_minibatch_fit=[]
mape_per_minibatch_fit=[]
for epoch in range(epochs):
    model.train(True)  # Set model to training mode
    total_rmse = 0.0
    total_mape = 0.0
    k=0
    for batch_idx,el in enumerate(dataloader):
        minibatch=el[:,:-2]
        altitude=el[:,-2]
        optimizer.zero_grad()  # Clear accumulated gradients    
        minibatch=minibatch.to(device)
        params = model(minibatch).to(device)

        k+=1

        minibatch=minibatch.to(device)
        optimizer.zero_grad()  # Clear accumulated gradients
        delta_params = model(minibatch).to(device)
        #now I construct the inputs for the compute_approximated_density function as corrections from the global fit:
        params = best_global_fit*(1+delta_params)
        rho_nn=thermonets.rho_approximation(h=altitude,
                                                params=params,
                                                backend='torch')
        rho_fit=torch.from_numpy(thermonets.rho_approximation(h=altitude.numpy(),
                                                                params=best_global_fit.numpy()))
        rho_target=el[:,-1].to(device)

        loss = criterion(torch.log10(rho_nn), torch.log10(rho_target))
        loss.backward()
        #I also compute the global fit loss:
        loss_fit =  torch.nn.MSELoss()(torch.log10(rho_fit).squeeze(), torch.log10(rho_target).squeeze())
        #I update the weights:
        optimizer.step()
        #let's store the losses for the NN:
        rmse_per_minibatch_nn.append(loss.item())
        mape_per_minibatch_nn.append(mean_absolute_percentage_error(rho_nn, rho_target).item())
        total_rmse+=rmse_per_minibatch_nn[-1]
        total_mape+=mape_per_minibatch_nn[-1]
        #now the same but for the global fit:
        rmse_per_minibatch_fit.append(loss_fit.item())
        mape_per_minibatch_fit.append(mean_absolute_percentage_error(rho_fit, rho_target).item())

        #ratio of the loss between the NN and the fit (the lower, the more the NN is doing better than a global fit)
        ratio_losses.append(loss.item()/loss_fit.item())
        #I only save the best model:
        if k>1:
            if rmse_per_minibatch_nn[-1]<min(rmse_per_minibatch_nn[:-1]):    
                #updating torch best model:
                torch.save(model.state_dict(), f'best_model.pth')
                best_loss=loss.item()
                #print(f'Saving model - current best loss: {best_loss}\n')
        else:
            best_loss=loss.item()
        #I print every 10 minibatches:
        if k%10:    
            print(f'minibatch: {k}/{len(dataloader)}, ratio: {ratio_losses[-1]:.10f}, best loss till now: {best_loss:.10f}, loss RMSE (log10) & MAPE -----  NN: {loss.item():.10f}, {mape_per_minibatch_nn[-1]:.7f}; fit: {loss_fit.item():.10f}, {mape_per_minibatch_fit[-1]:.7f}', end='\r')
    #I also print at the end of the epoch
    print(f'End of epoch {epoch + 1}/{epochs}, average RMSE (log10) loss: {total_rmse / len(dataloader)}, average MAPE: {total_mape / len(dataloader)}, ')


minibatch: 59/245, ratio: 1.8085699747, best loss till now: 0.1405232847, loss RMSE (log10) & MAPE -----  NN: 0.1488258839, 77.3640289; fit: 0.0822892594, 53.2157058