## Demo of MLP Neural Net for Regression and Classification Problems

### Install required nuget packages

In [9]:
#r "nuget: Tensorflow.NET"
#r "nuget: Tensorflow.Keras"
#r "nuget: SciSharp.TensorFlow.Redist"
#r "nuget: Numpy, 3.11.1.33"
#r "nuget: NumSharp, 0.30.0"

### Used namespaces

In [12]:
using static Tensorflow.Binding;
using static Tensorflow.KerasApi;
using Tensorflow;
using TensorNumpy = Tensorflow.NumPy;
using Numpy;
using System;
using System.IO;
using Numpy.Models;
using OneOf.Types;
using Tensorflow.Keras.Metrics;

### Define global constants and variables..

In [30]:
class Configs
{
    public static string WORKING_DIRECTORY = Directory.GetCurrentDirectory();
    //public const string DATA_FILE_PATH = "../data/heart_disease_dataset.csv"; 
    public static string DATA_FILE_PATH = Path.Combine(WORKING_DIRECTORY, @"data\winequality-red.csv");
    //public const char FILE_DELIMITER = ',';
    public const char FILE_DELIMITER = ';';
    public const bool IS_USE_HEADER = true;
    public const bool IS_CLASSIFICATION_TASK = false;
    public const float LEARNING_RATE = 0.001f;
    public const float TRAIN_VALID_SPLIT_FRACTION = 0.80f;
    public const float VALID_SPLIT_FRACTION = 0.20f;
    public const float TEST_SPLIT_FRACTION = 1.0f - TRAIN_VALID_SPLIT_FRACTION;
    public const bool IS_SHUFFLE_DATA = true;
    public const string NEURAL_NET_TOPOLOGY_FILENAME = "../../resources/classification_neural_net.png";
    public const int RANDOM_SEED = 0;
    public const int EPOCH_REPORT_LIMIT = 20;
    public const float DROPOUT_RATE = 0.2f;
}


### Utility helper class

In [31]:

public static class Helpers
{
    public static ValueTuple<NDarray, NDarray, NDarray, NDarray> splitData(
        NDarray X,
        NDarray y,
        double split_fraction,
        bool is_shuffle=true
        )
    {
        int n_rows = X.shape[0];
        int n_train_rows = (int) (n_rows * (1.0 - split_fraction));
        string train_filter = $":{n_train_rows},:";
        string test_filter = $"{n_train_rows}:,:";
        if (is_shuffle)
        {
            X = Numpy.np.random.permutation(X);
            y = Numpy.np.random.permutation(y);            
        }
        
        var X_train = X[train_filter];
        var X_test = X[test_filter];
        var y_train = y[train_filter];
        var y_test = y[test_filter];
        var result = (X_train, X_test, y_train, y_test);
        return result;
    }

    public static ValueTuple<NDarray, NDarray> computeMinMax(NDarray dataset)
    {
        var min_values = Numpy.np.min(dataset, axis: new int[0]);
        var max_values = Numpy.np.max(dataset, axis: new int[0]);
        return (min_values, max_values);
    }

    public static NDarray minmaxScaling(NDarray dataset, ValueTuple<NDarray, NDarray> minmax)
    {
        var dataset_scaled = (dataset - minmax.Item1) / (minmax.Item2 - minmax.Item1);
        return dataset_scaled;
    }

    public static (NumSharp.NDArray, NumSharp.NDArray) ReadCsv(
        string path, 
        char delimiter=',', 
        bool has_header=Configs.IS_USE_HEADER,
        bool is_classification_task=Configs.IS_CLASSIFICATION_TASK
        ) 
    {
        List<object> labels = new List<object>();

        var x1 = new List<double>();
        var y1 = new List<object>();        

        int length1d = 0;
        int length2d = 0;
        bool has_skipped_header = false;

        using (StreamReader reader = new StreamReader(path))
        {
            string line = String.Empty;

            while (!String.IsNullOrEmpty(line = reader.ReadLine()))
            {
                if (has_header & !has_skipped_header)
                {
                    has_skipped_header = true;
                    continue;
                }
                    
                var tokens = line.Split(delimiter);
                x1.AddRange(tokens.Select(x => double.Parse(x)).Take(tokens.Length - 1));

                object _y;
                if (is_classification_task)
                {
                    _y = int.Parse(tokens[tokens.Length - 1]);
                    if (!labels.Contains(_y))
                    {
                        labels.Add(_y);
                    }
                    y1.Add(labels.FindIndex(l => l == _y));
                }
                else
                {
                    _y = double.Parse(tokens[tokens.Length - 1]);
                    y1.Add(_y);
                }

                length1d++;
                length2d = tokens.Length - 1;
            }
        }

        var X = NumSharp.np.array(x1.ToArray()).reshape(length1d, length2d);
        //if (is_classification_task)
            //var y = NumSharp.np.array(y1.Select(x => (int) X).ToArray()).reshape(-1, 1);
        if (is_classification_task)
        {
            var y_int = NumSharp.np.array(y1.ToArray()).reshape(-1, 1);
            return (X, y_int); 
        }
        else
        {
            var y_double = NumSharp.np.array(y1.Select(x => Convert.ToDouble(x)).ToArray()).reshape(-1, 1);
            return (X, y_double); 
        }        
    }

    public static (NumSharp.NDArray, NumSharp.NDArray) shuffleDataset(
        NumSharp.NDArray X, 
        NumSharp.NDArray y)
    {
        var perm = NumSharp.np.random.permutation(y.shape[0]);

        NumSharp.np.random.shuffle(perm);
        return (X[perm], y[perm]);
    }

    public static (NumSharp.NDArray, NumSharp.NDArray, NumSharp.NDArray, NumSharp.NDArray) splitDataset(
        NumSharp.NDArray X, 
        NumSharp.NDArray y,
        float split_factor
        )
    {
        var n_rows =  X.shape[0];
        var n_train_valid = (int) (n_rows * split_factor);
        var train_valid_pattern = $":{n_train_valid}";
        var test_pattern = $"{n_train_valid}:";
        var X_train_valid = X[train_valid_pattern];
        var X_test = X[test_pattern];
        var y_train_valid = y[train_valid_pattern];
        var y_test = y[test_pattern];

        return (X_train_valid, X_test, y_train_valid, y_test);
    }

    public static (NumSharp.NDArray, NumSharp.NDArray, NumSharp.NDArray) normalizeData(NumSharp.NDArray X)
    {
        var mean = NumSharp.np.mean(X, axis: 0);
        var std = NumSharp.np.std(X, axis: 0);
        var X_normalized =  (X - mean) / std;
        return (X_normalized, mean, std);
    }
}       

### Custom metric - R^2

In [37]:
public class RSquared : IMetricFunc
{
    Tensor? rSquared;

    public RSquared()
    {
        rSquared = null;
    }

    public Tensor update_state(Tensor y_true, Tensor y_pred, Tensor args)
    {
        var residual = tf.reduce_sum(tf.square(tf.subtract(y_true, y_pred)));
        var total = tf.reduce_sum(tf.square(tf.subtract(y_true, tf.reduce_mean(y_true))));
        rSquared = tf.subtract(tf.constant(1.0f), tf.divide(residual, total));
        return rSquared;
    }

    public Tensor result()
    {
        return rSquared;
    }

    public void reset_states()
    {

    }

    public string Name => "RSquared";
}


### Demo of the MLP Neural Network

In [43]:
public static class Demo
{
    public static void run()
    {
        demoMLP();                    
    }

    static (NumSharp.NDArray, NumSharp.NDArray, NumSharp.NDArray, NumSharp.NDArray, NumSharp.NDArray, NumSharp.NDArray) 
    readAndPreprocessData()
    {
        var (X,y) = Helpers.ReadCsv(Configs.DATA_FILE_PATH, Configs.FILE_DELIMITER);
        var (X_shuffled, y_shuffled) = Helpers.shuffleDataset(X, y);
        var (X_train_valid, X_test, y_train_valid, y_test) = Helpers.splitDataset(
            X_shuffled, 
            y_shuffled, 
            split_factor: Configs.TRAIN_VALID_SPLIT_FRACTION);
        return (X, y, X_train_valid, X_test, y_train_valid, y_test);
    }

    static TensorNumpy.NDArray convertToCSharpFloatArrayFromNdarray(NumSharp.NDArray x) 
    {
        var x_array = x.ToMuliDimArray<float>();
        var x_tensor_array = TensorNumpy.np.array(x_array);
        return x_tensor_array;            
    } 

    static TensorNumpy.NDArray convertToCSharpIntArrayFromNdarray(NumSharp.NDArray x) 
    {
        var x_array = x.ToMuliDimArray<int>();
        var x_tensor_array = TensorNumpy.np.array(x_array);
        return x_tensor_array;            
    } 

    static void demoMLP()
    {
        var (X, y, X_train_valid, X_test, y_train_valid, y_test) = readAndPreprocessData();
        var (X_train_valid_norm, X_mean, X_std) = Helpers.normalizeData(X_train_valid);
        var (y_train_valid_norm, y_mean, y_std) = Helpers.normalizeData(y_train_valid);
        var n_features = X_train_valid.shape[1];
        
        var model = createMLPModel(n_features);
        var X_train_valid_tensor_array = convertToCSharpFloatArrayFromNdarray(X_train_valid_norm);
        var y_train_valid_tensor_array = convertToCSharpFloatArrayFromNdarray(y_train_valid_norm);
        model.fit(
            x: X_train_valid_tensor_array, 
            y: y_train_valid_tensor_array,
            validation_split: 0.20f,
            epochs: 10, 
            batch_size: 32);
    }

    private static Tensorflow.Keras.Engine.Sequential createMLPModel(int n_features)
    {
        var model = KerasApi.keras.Sequential();
        model.add(KerasApi.keras.layers.Dense(
            units: 64,
            input_shape: new Tensorflow.Shape(n_features),
            activation: KerasApi.keras.activations.Relu
        ));
        model.add(KerasApi.keras.layers.Dense(
            units: 64,
            activation: KerasApi.keras.activations.Relu
        ));
        model.add(KerasApi.keras.layers.Dense(
            units: 64,
            activation: KerasApi.keras.activations.Relu
        ));
        model.add(KerasApi.keras.layers.Dense(
            units: 1
        ));
        model.summary();
        IMetricFunc metric = new RSquared();
        model.compile(
            optimizer: KerasApi.keras.optimizers.Adam(learning_rate: Configs.LEARNING_RATE),
            loss: KerasApi.keras.losses.MeanAbsoluteError(),
            //metrics: new [] {}
            //metrics: new[] { KerasApi.keras.metrics.Accuracy() }
            metrics: new[] { metric }
        );
        return model;
    }

    static void createMLPModel(
            NumSharp.NDArray X_train_valid, 
            NumSharp.NDArray y_train_valid)
    {
        var n_features = X_train_valid.shape[0];
        var (X_train_valid_norm, X_mean, X_std) = Helpers.normalizeData(X_train_valid);
        var (y_train_valid_norm, y_mean, y_std) = Helpers.normalizeData(y_train_valid);
                    
    } 
    
}

In [44]:
 Demo.run();

Model: sequential 
_________________________________________________________________ 
Layer (type)                  Output Shape              Param #   
dense (Dense)                 (None, 64)                768       
_________________________________________________________________ 
dense_1 (Dense)               (None, 64)                4160      
_________________________________________________________________ 
dense_2 (Dense)               (None, 64)                4160      
_________________________________________________________________ 
dense_3 (Dense)               (None, 1)                 65        
Total params: 9153 
Trainable params: 9153 
Non-trainable params: 0 
_________________________________________________________________ 
Epoch: 001/010
Epoch: 002/010
Epoch: 003/010
Epoch: 004/010
Epoch: 005/010
Epoch: 006/010
Epoch: 007/010
Epoch: 008/010
Epoch: 009/010
Epoch: 010/010
