In [None]:
import numpy as np
import pandas as pd
import time
import scipy.optimize as optimize
from scipy.integrate import odeint, solve_ivp
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

# Import custom functions from other notebooks
from ipynb.fs.full.myfun_nn import *
from ipynb.fs.full.myfun_model_usefulfuns import *
from ipynb.fs.defs.myfun_plot import *

# NN driven model

In [None]:
def TD_ANN_model(t, X, vel):
    """
    Lighthill-Whitham-Richards (LWR) traffic flow model in 1D.

    Args:
    - t: Time parameter (not used in this function).
    - X: Spatial parameter representing location.
    - vel: Velocity data that influences the traffic dynamics.

    Returns:
    - d_x: The rate of change of traffic density (not explicitly calculated here, it's based on velocity).
    """
    
    # In this simple model, the rate of change of traffic density is determined by the velocity.
    d_x = vel
        
    return d_x

## Ode solver for the NN driven model

In [None]:
def odesolver_ann(x0, vel, t0, tend, deltat=0.05):
    """
    Solve the TD_ANN_model ODE system using the odeint solver.

    Args:
    - x0: Initial condition.
    - vel: Velocity data that influences the traffic dynamics.
    - t0: Initial time.
    - tend: End time.
    - deltat: Time step (default: 0.05).

    Returns:
    - tspan_ann: Time points at which the solution is computed.
    - sol_ann: Solution to the ODE system at the specified time points.
    """

    tspan_ann = time_discretization(t0, tend, deltat)  # Generate a time array with discrete time points.

    # Use odeint to solve the ODE system defined by TD_ANN_model.
    sol_ann = odeint(TD_ANN_model, x0, tspan_ann, args=(vel,), tfirst=True).T

    return tspan_ann, sol_ann

In [None]:
def create_data_ann_scene(scn):
    """
    Create data for training an ANN scene.

    Args:
    - scn: A dictionary or data structure representing a scene.

    Returns:
    - X_scn: A list of consecutive distances between the vehicles in the scene at each timestamp.
    - y_scn: A list of approximated velocities for all vehicles except the leader one.
    """
    
    # Create X: List of consecutive distances between vehicles in the scene.
    X_scn = scn['cons_dis']

    # Create y: List of approximated velocities.
    dX_scn = np.diff(scn['Xarr'], axis=1)
    dT_scn = np.diff(scn['Tarr'])
    velocity = dX_scn / dT_scn  # Calculate velocity at timestamps

    # We choose the first velocity discretized as (x_(i+1) - x_i) / deltaT
    y_scn = velocity[:-1]  # Exclude the last vehicle (leader)

    return X_scn, y_scn

### Custom training loop

In [None]:
def solve_step(model, scn, v0, it, lists, nn_fun):
    """
    Perform a single iteration of solving a scene's dynamics step by step.

    Args:
    - model: The neural network model to be used for prediction.
    - scn: A dictionary or data structure representing a scene.
    - v0: Initial velocity (leader vehicle's velocity).
    - it: Current iteration.
    - PLOT_ITER: Number of iterations to plot.
    - lists: Lists of time points, distances between vehicles, and predicted velocities.
    - nn_fun: Tuple containing loss function and optimizer.

    Returns:
    - t_list: List of time points.
    - x_list: List of distances between vehicles over time.
    - v_list: List of predicted velocities for each vehicle.
    """

    t_list, x_list, v_list = lists
    X_arr, y_arr = create_data_ann_scene(scn)
    N, tstamps, fmt = scn['N. vehicles'], scn['Tarr'], '{0:.02f}'

    loss_fn, optimizer = nn_fun
    
    for i in range(0, len(tstamps) - 1):
                
        # STEP 1: Create the dataset and train the NN model
        X, y = X_arr[:, i], y_arr[:, i]
        
        # Train the NN and update model coefficients
        with tf.GradientTape(persistent=True) as tape:

            # Create a tensor that you will watch
            x_tensor = tf.convert_to_tensor(X, dtype=tf.float64)
            tape.watch(x_tensor)

            y_pred = model(X, training=True)  # Forward pass
            loss_value = loss_fn(y_true=y, y_pred=y_pred)  # Loss function          

        # Compute gradients
        trainable_vars = model.trainable_variables
        grads = tape.gradient(loss_value, trainable_vars)
        
        # Update weights
        optimizer.apply_gradients(zip(grads, trainable_vars))
        
        # STEP 2: Solve the ODE system in this time interval
        x0 = [l[-1] for l in np.vstack(x_list).tolist()]  # Last values computed
        t0, tend = scn['Tarr'][i], scn['Tarr'][i + 1]
        v_ann = np.append(y_pred.numpy().flatten().tolist(), v0).tolist()
        tspan_ann, sol_ann = odesolver_ann(x0, v_ann, t0, tend, deltat=0.05)    
        
        # STEP 3: Store the information
        x_list, t_list = update_sol_lists(N, tspan_ann, sol_ann, x_list, t_list)
        v_list.append(v_ann)
        
    return t_list, x_list, v_list

In [None]:
def solve_nn_scn(model, scn, v0, LEARNING_RATE_NN=0.001, LEARNING_RATE_v0=0.5, NUM_ITER=200, info_flag=True):
    """
    Solve a scene's dynamics with custom training of a neural network model and updating v0 using SGD.

    Args:
    - model: The neural network model to be used for prediction.
    - scn: A dictionary or data structure representing a scene.
    - v0: Initial velocity (leader vehicle's velocity).
    - LEARNING_RATE_NN: Learning rate for the neural network (default: 0.001).
    - LEARNING_RATE_v0: Learning rate for updating v0 using SGD (default: 0.5).
    - NUM_ITER: Number of iterations (default: 200).
    - PLOT_ITER: Interval for printing information (default: 25).

    Returns:
    - t_best: List of time points for the best iteration.
    - x_best: List of distances between vehicles for the best iteration.
    - v_best: List of predicted velocities for each vehicle for the best iteration.
    - v0_scn: List of updated v0 values for each iteration.
    - it: Number of iterations.
    """
    
    N, tstamps, fmt = scn['N. vehicles'], scn['Tarr'], '{0:.02f}'
    v0_scn = []
    
    print("--"*50)
    print(f"We have {len(tstamps) - 1} time intervals inside [{fmt.format(tstamps[0])},{fmt.format(tstamps[-1])}]")

    # Setting learning rate for SGD
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
                    initial_learning_rate=LEARNING_RATE_NN,
                    decay_steps=int(NUM_ITER/2) + 1,
                    decay_rate=0.9,
                    staircase=True)
    
    optimizer = tf.keras.optimizers.SGD(learning_rate=lr_schedule)
    loss_fn = tf.keras.losses.MeanSquaredError()
    
    err_list, err, diff = [], 1e9, 1
    err_list.append(err)
    
    it = 1
    err_best, it_best = err, it
    
    while (diff > 1e-6 and it < NUM_ITER + 1):
        
        ## STEP 1: Simulate the dynamics over a scene with v0
        t_list, x_list, v_list = [scn['Tarr'][0]], [[i] for i in scn['Xarr'][:, 0]] , []
        t_list, x_list, v_list = solve_step(model, scn, v0, it,
                                            lists=[t_list, x_list, v_list],
                                            nn_fun=[loss_fn, optimizer])
        
        _, sol_ann_matched = match_timestamps_scene(t_list, x_list)

        
        ## STEP 2: Update v0 with SGD
        v0_upd, loss_val, grads, g = SGD_v0(scn, sol_ann_matched, v0, LEARNING_RATE_v0) 
        v0_scn.append(v0)
        v0 = v0_upd
        
        ## STEP 3: update params according to the update rule
        err = loss_fn(y_true=scn['Xarr'], y_pred=sol_ann_matched).numpy()
        err_list.append(err)

        if err < err_best:
            t_best, x_best, v_best = t_list, x_list, v_list
            it_best, err_best = it, err
        
        # Update diff
        if it % 50 == 0:
             diff = abs(err_list[-1] - err_list[-50])
   
        it += 1

    ## STEP 4: Print some info
    if info_flag:
        _, y_arr = create_data_ann_scene(scn)
        for i in range(0, len(tstamps) - 1):
            print(f"\
            Interval n.{i}: [{fmt.format(scn['Tarr'][i])}, {fmt.format(scn['Tarr'][i+1])}]\n\
                * y_true: {y_arr[:, i]}\n\
                * v_ann: {v_best[i]}\n")
        
        print(f"\
        Some info:\n\
        * MSE = {err}\n\
        * Learning rate NN = {optimizer.learning_rate.numpy()}\n\
        * diff (update rule for LR NN) = {diff}\n\
        * It = {it-1}")
        
    # Plot function
    tscale = 1 + (tstamps[-1] - tstamps[0]) / 10000
    title = f"$df\  n.\ {scn['N. file']}\ -\ Scene\ n.\ {scn.name+1},\ at\ it={it - 1}$"
    plot_scn(scn, sol_ann_matched, title, xbal=0.01, ybal=0.05, scale=tscale)

    print("--"*50)
    
    return t_best, x_best, v_best, v0_scn, it

### Solve the nn-driven model in the all the scenes in a df, and get v0 mean for each scene

In [None]:
def lr_finder(model, scn, v0):
    """
    Find the best learning rate for a model using a learning rate range test.

    Args:
    - model: The neural network model to be used for prediction.
    - scn: A dictionary or data structure representing a scene.
    - v0: Initial velocity (leader vehicle's velocity).

    Returns:
    - err_lr_best: Best error obtained during the learning rate range test.
    - lr_best: Best learning rate found.
    - it_lr_best: Number of iterations for the best learning rate.
    """

    X_arr, y_arr = create_data_ann_scene(scn)
    lr_range = [0.01, 0.005, 0.001, 0.0005, 0.0001, 0.00005, 0.00001]
    err_lr_best, lr_best = 1e9, None

    for lr in lr_range:

        mmodel = tf.keras.models.clone_model(model)
        optimizer = tf.keras.optimizers.SGD(learning_rate=lr)
        loss_fn = tf.keras.losses.MeanSquaredError()

        for it in range(25):
            
            t_list, x_list, v_list = [scn['Tarr'][0]], [[i] for i in scn['Xarr'][:,0]], []
            t_list, x_list, v_list = solve_step(mmodel, scn, v0, -1,
                                                lists=[t_list, x_list, v_list],
                                                nn_fun=[loss_fn, optimizer])
            
            _, sol_ann_matched = match_timestamps_scene(t_list, x_list)            

        err = loss_fn(y_true=scn['Xarr'], y_pred=sol_ann_matched).numpy()  

        if err < err_lr_best:
            err_lr_best, lr_best, it_lr_best = err, lr, it
        
    return err_lr_best, lr_best, it_lr_best

In [None]:
def SGD_v0(scn, x_list_matched, v0, LEARNING_RATE_v0):
    """
    Update v0 using stochastic gradient descent (SGD) based on a loss between true and predicted trajectories.

    Args:
    - scn: A dictionary or data structure representing a scene.
    - x_list_matched: List of distances between vehicles for matched timestamps.
    - v0: Initial velocity (leader vehicle's velocity).
    - LEARNING_RATE_v0: Learning rate for updating v0.

    Returns:
    - v0_upd: Updated v0 after the SGD step.
    - loss_val: Loss value based on the comparison of true and predicted trajectories.
    - grads: Gradients computed during the optimization.
    - g: Mean gradient of the leader vehicle's velocity.
    """

    loss_objective = tf.keras.losses.MeanSquaredError()
    
    with tf.GradientTape(persistent=True) as tape:

        trajs_true_tensor = tf.convert_to_tensor(scn['Xarr'], dtype=tf.float64)

        # Create tensor that you will watch
        trajs_pred_tensor = tf.convert_to_tensor(x_list_matched, dtype=tf.float64)
        tape.watch(trajs_pred_tensor)

        loss_val = loss_objective(y_true=trajs_true_tensor, y_pred=trajs_pred_tensor)

    # Compute gradients
    grads = tape.gradient(loss_val, trajs_pred_tensor)

    # Update v0 based on the mean gradient of the leader vehicle's velocity
    g = grads[-1].numpy()[1:].mean()  # Watching at the mean velocity of the leader car
    v0_upd = v0 - LEARNING_RATE_v0 * g
    
    return v0_upd, loss_val, grads, g


In [None]:
def train_nn_df(df, model, v0, NUM_ITER, LEARNING_RATE_v0):
    """
    Solve the neural network model in a single DataFrame and get information for each scene.

    Args:
    - df: The DataFrame containing scene data.
    - doe: The neural network model architecture.
    - v0: Initial velocity guess.
    - NUM_ITER: Number of iterations.
    - LEARNING_RATE_v0: Learning rate for updating v0 (default: 0.5).

    Returns:
    - info_df: A DataFrame containing information about each scene's results.
    """

    scn_list = seq2scn(df)
    info_scn, fmt, mse_list = [], '{0:.02f}', []
    loss_fn = tf.keras.losses.MeanSquaredError()
    
    for scnnum, scn in enumerate(scn_list):
        
        tstamps = scn['Tarr']
        err_lr_best, lr_best, it_lr_best = lr_finder(model, scn, v0)

        print(f"DataFrame n.{scn['N. file']}. Scene n.{scnnum+1}/{len(scn_list)}")
    
        t_list, x_list, v_list, v0_scn, it = solve_nn_scn(model, scn, v0,
                                                            lr_best,
                                                            LEARNING_RATE_v0,
                                                            NUM_ITER, info_flag=False)

        # Compute MSE for the solution computed
        _, sol_ann_matched = match_timestamps_scene(t_list, x_list)
        mse = loss_fn(y_true=scn['Xarr'], y_pred=sol_ann_matched).numpy()
        mse_list.append(mse)
        
        info_scn.append([t_list, x_list, v_list, v0_scn[-1], scn.name, it-1])

        print(f"\
        For scene {scnnum+1}/{len(scn_list)}:\n\
        * After LR finder: LR_NN={lr_best} with mse={err_lr_best} at it={it_lr_best}\n\
        * v0 = {v0_scn[-1]}\n\
        * MSE = {mse}\n\
        * iterations = {it-1}")
        print("--"*50)
        print("\n")

    # Transpose info_df for better handling
    tmp = list(map(list, zip(*info_scn)))

    mse_mean = np.mean(mse_list)
    
    info_df = pd.DataFrame({
        't_list': tmp[0],
        'x_list': tmp[1],
        'v_list': tmp[2],
        'v0': tmp[3],
        'n_scn': tmp[4],
        'iter': tmp[5],
        'mse': mse_list
    })

    return info_df, mse_mean, model

In [None]:
def test_nn_df(df, model, v0, plot_flag=True):
    """
    Test a neural network model on a DataFrame of scenes and return evaluation information.

    Args:
    - df: DataFrame containing scene data.
    - model: Trained neural network model.
    - v0: Initial velocity guess.

    Returns:
    - info_df: DataFrame containing information about the evaluation.
    - mse_mean: Mean squared error across all scenes.
    - model: The trained neural network model.
    """

    scn_list = seq2scn(df)
    info_scn, fmt, mse_list = [], '{0:.02f}', []
    loss_fn = tf.keras.losses.MeanSquaredError()

    # Run over scenes
    for scnnum, scn in enumerate(scn_list):

        N, tstamps = scn['N. vehicles'], scn['Tarr']
        X_arr, y_arr = create_data_ann_scene(scn)

        print(f"DataFrame n.{scn['N. file']}. Scene n.{scnnum + 1}/{len(scn_list)}")

        # Initialize lists
        t_list, x_list, v_list = [scn['Tarr'][0]], [[i] for i in scn['Xarr'][:, 0]], []

        # Solve the ODE
        for i in range(0, len(tstamps) - 1):
            
            X, y = X_arr[:, i], y_arr[:, i]

            # Solve the ODE system in this time interval
            x0 = [i for i in scn['Xarr'][:, i]]
            t0, tend = scn['Tarr'][i], scn['Tarr'][i + 1]
            y_pred = model(X)
            v_ann = np.append(y_pred.numpy().flatten().tolist(), v0).tolist()
            tspan_ann, sol_ann = odesolver_ann(x0, v_ann, t0, tend, deltat=0.05)

            # Store the information
            x_list, t_list = update_sol_lists(N, tspan_ann, sol_ann, x_list, t_list)
            v_list.append(v_ann)

        # Evaluate the error
        _, sol_ann_matched = match_timestamps_scene(t_list, x_list)
        mse = loss_fn(y_true=scn['Xarr'], y_pred=sol_ann_matched).numpy()
        mse_list.append(mse)

        info_scn.append([t_list, x_list, v_list, scn.name])
        
        # Plot function
        if plot_flag:
            tscale = 1 + (tstamps[-1] - tstamps[0]) / 10000
            title = f"$df\  n.\ {scn['N. file']}\ -\ Scene\ n.\ {scnnum+1} - Test$"
            plot_scn(scn, sol_ann_matched, title, xbal=0.01, ybal=0.05, scale=tscale)

        print(f"\
        For scene {scnnum + 1}/{len(scn_list)}:\n\
        * MSE = {mse}")
        print("--" * 50)
        print("\n")

    # Transpose info_df for better handling
    tmp = list(map(list, zip(*info_scn)))

    mse_mean = np.mean(mse_list)

    info_df = pd.DataFrame({
        't_list': tmp[0],
        'x_list': tmp[1],
        'v_list': tmp[2],
        'n_scn': tmp[3],
        'v0': v0,
        'mse': mse_list
    })

    return info_df, mse_mean

In [None]:
def solve_nn_df(df, model, v0, NUM_ITER, LEARNING_RATE_v0=0.5, test=0.3, plot_flag=True):
    """
    Train and test a neural network model on a DataFrame of scenes.

    Args:
    - df: DataFrame containing scene data.
    - model: Neural network model.
    - v0: Initial velocity guess.
    - NUM_ITER: Number of iterations for training.
    - LEARNING_RATE_v0: Learning rate for v0 optimization.
    - test_size: Proportion of the dataset to include in the test split.

    Returns:
    - info_df: DataFrame containing information about the evaluation.
    """

    df_train, df_test = train_test_split(df, test_size = test, random_state=42)
        
    print(f"In DataFrame n.{df['N. file'][0]} we have {len(df)} scenes.\n\
    To train the model we use {len(df_train)} scenes, the remaining {len(df_test)} to test the model.")
    
    ## TRAINING
    print(f"\nTraining step. ({len(df_train)} scenes)")
    print("=="*50)
    info_train_df, mse_train, model_trained = train_nn_df(df_train, model, v0, NUM_ITER, LEARNING_RATE_v0)

    print(f"MSE train: {mse_train}")

    print("- -" * 35)

    ## TESTING
    print(f"\nTesting step. ({len(df_test)} scenes)")
    print("=="*50)
    
    v0_trained = np.mean(info_train_df['v0'])
    info_test_df, mse_test = test_nn_df(df_test, model_trained, v0_trained, plot_flag)
    
    print(f"MSE test: {mse_test}\n")
    
    print("- -" * 35)

    print(f"\n\
    Summing up:\n\
          * MSE train: {mse_train}\n\
          * MSE test: {mse_test}\n\
          ")

    print("- -" * 35)
    
    print("\n\n\n")
    
    # Concatenating info_df
    info_train_df['type']="train"
    info_train_df['mse_mean']=mse_train

    info_test_df['type']="test"
    info_test_df['mse_mean']=mse_test

    info_df = pd.concat([info_train_df, info_test_df], sort=False)
    info_df['N. file'] = df['N. file'][0]
    
    return info_df, model_trained

### Solve NN driven model in each df of a dataset, and get v0 mean for each scn in each df

In [None]:
def solve_nn_dataset(dataset, doe, v0, processed_flag, NUM_ITER, LEARNING_RATE_v0=0.5, test=0.3, plot_flag=True):
    """
    Process a dataset of scenes using a neural network with the specified structure.

    Args:
    - doe: Neural network structure.
    - v0_guess: Initial velocity guess.
    - dataset: List of DataFrames containing scene data.
    - processed_flag: Flag indicating whether the dataset has been processed.
    - NUM_ITER: Number of iterations for training.
    - LEARNING_RATE_v0: Learning rate for v0 optimization.

    Returns:
    - info_dataset: Concatenated DataFrame containing information about the evaluation for each scene.
    """
    
    tmp = []

    print("--"*50)
    s = '-'.join(str(x) for x in doe)
    print(f"Analyzing {len(dataset)} dfs. NN structure: {s}")
    print("**"*50)

    model = create_model(doe)
    
    for step, df in enumerate(dataset):

        print(f"Analyzing {step+1}/{len(dataset)} dfs.")
        
        info_df, model_trained = solve_nn_df(df, model, v0, NUM_ITER, LEARNING_RATE_v0, test, plot_flag)

        # Store info about the NN structure
        nrow = info_df.shape[0]
        info_df['DOE'] = [doe]*nrow
        info_df['processed'] = [processed_flag]*nrow

        tmp.append(info_df)

    # Concatenate individual DataFrames into a single DataFrame
    info_dataset = pd.concat(tmp, sort=False, ignore_index=True)

    return info_dataset, model_trained

### Solve NN driven model in the whole dataset, looping over DOE

In [None]:
# def solve_nn_dataset_DOE(v0, dataset, processed_flag, NUM_ITER, LEARNING_RATE_v0, test):
        
#     DOE =[[1,2,1], [1,4,1], [1,10,1]] #Design of experiment
#     tmp = []
#     models_trained = []

#     for DOE_struct in DOE:

#         print("\n")
#         print("=="*30)

#         s = '-'.join(str(x) for x in DOE_struct)
#         print(f"NN structure: {s}")

#         info_dataset, model_trained = solve_nn_dataset(dataset, DOE_struct, v0, processed_flag, NUM_ITER,
#                                                        LEARNING_RATE_v0, test, plot_flag=True)

#         # Store info about the NN structure
#         nrow = info_dataset.shape[0]
#         info_dataset['DOE'] = [DOE_struct]*nrow
#         info_dataset['processed'] = [processed_flag]*nrow
#         models_trained.append([model_trained, DOE_struct])

        
#         # Append info_dflist
#         tmp.append(info_dataset)

#         print("=="*30)
    
#     # To better handling info_alldataset
#     info_alldataset = pd.concat(tmp, sort=False);
    
#     return info_alldataset, models_trained