In [42]:
import numpy as np
import pandas as pd #csv-files and tables
import tensorflow as tf #neural network
# import keras.backend as K
#create graphics
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px

# #utils for working with neural network
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import accuracy_score

# #interactive in jupyter notebook
import ipywidgets as widgets
from ipywidgets import interact, interact_manual

In [43]:
import levenberg_marquardt as lm

In [44]:
# activate widgets in terminal
# jupyter nbextension enable --py widgetsnbextension

In [45]:
#loading datasets
dataset_0 = pd.read_csv('data/simple.csv')
dataset_0.name = 'simple'
dataset_1 = pd.read_csv('data/iris_new.csv')
dataset_1.name = 'iris'
dataset_2 = pd.read_csv('data/cancer.csv')
dataset_2.name = 'cancer'
dataset_3 = pd.read_csv('data/glass.csv')
dataset_3.name = 'glass'
dataset_4 = pd.read_csv('data/thyroid.csv')
dataset_4.name = 'thyroid'
dataset_5 = pd.read_csv('data/vine.csv')
dataset_5.name = 'vine'
dataset_6 = pd.read_csv('data/train_hackaton.csv')
dataset_6.name = 'hackaton'
dataset_7 = pd.read_excel('data/data_spectrum1.xlsx', sheet_name="Лист2")
dataset_7.name = 'sensor'
dataset_7_test = pd.read_excel('data/data_spectrum_kubinka15_1.xlsx', sheet_name="Лист2")

In [46]:
dataset_test_hack = pd.read_csv('data/test_hackaton.csv')
input_data_test_hack = dataset_test_hack.iloc[:, :22]
output_data_test_hack = dataset_test_hack.iloc[:, -5:]

In [60]:
in_dataset_7_test = dataset_7_test.iloc[:,10:40]
out_dataset_7_test = dataset_7_test.iloc[:,-1:]

In [48]:
in_dataset_7_test = in_dataset_7_test/in_dataset_7_test.max().astype(np.float64)

In [49]:
max_test_hack = []
for i in output_data_test_hack.values:
    max_test_hack.append(tf.math.argmax(i).numpy() + 1)

In [61]:
#data markup
#iloc -> select data on table

input_data_0 = dataset_0.iloc[:, :2]
output_data_0 = dataset_0.iloc[:, -4:]
io0 = [input_data_0.values, output_data_0.values]

input_data_1 = dataset_1.iloc[:, :4]
output_data_1 = dataset_1.iloc[:, -3:]
io1 = [input_data_1.values, output_data_1.values]

input_data_2 = dataset_2.iloc[:, :9]
output_data_2 = dataset_2.iloc[:, -2:]
io2 = [input_data_2.values, output_data_2.values]

input_data_3 = dataset_3.iloc[:, :9]
output_data_3 = dataset_3.iloc[:, -2:]
io3 = [input_data_3.values, output_data_3.values]

input_data_4 = dataset_4.iloc[:, :21]
output_data_4 = dataset_4.iloc[:, -3:]
io4 = [input_data_4.values, output_data_4.values]

input_data_5 = dataset_5.iloc[:, :13]
output_data_5 = dataset_5.iloc[:, -3:]
io5 = [input_data_5.values, output_data_5.values]

input_data_6 = dataset_6.iloc[:, :22]
output_data_6 = dataset_6.iloc[:, -5:]
io6 = [input_data_6.values, output_data_6.values]

input_data_7 = dataset_7.iloc[:, 10:40]
output_data_7 = dataset_7.iloc[:, -1:]
io7 = [input_data_7.values, output_data_7.values]

# io = [io0, io1, io2, io3, io4, io5, io6, io7]

In [62]:
input_data_7 = input_data_7/input_data_7.max().astype(np.float64)

In [63]:
io = [io0, io1, io2, io3, io4, io5, io6, io7]

In [64]:
#dictionary selection by name
dict_datasets = {
 dataset_0.name:(dataset_0,io0),
 dataset_1.name:(dataset_1,io1),
 dataset_2.name:(dataset_2,io2),
 dataset_3.name:(dataset_3,io3),
 dataset_4.name:(dataset_4,io4),
 dataset_5.name:(dataset_5,io5),
 dataset_6.name:(dataset_6,io6),
 dataset_7.name:(dataset_7,io7)
}

In [65]:
def mae_easy(y_true, y_pred):
    return K.mean(K.abs(y_true-y_pred))

In [73]:
@interact_manual
def learning_datasets(
        dataset_name = dict_datasets.keys(),
        test_size = (0.05,1,0.05),
        batch_size = widgets.IntText(
            value=4,
            disabled=False
        ),
        activation_1_layer = ['tanh', 'sigmoid', 'relu'],
        kernel_reg_1_layer = widgets.FloatText(
            value=0.0,
            disabled=False
        ),
        dropout_1 =  widgets.FloatText(
            value=0.0,
            disabled=False
        ), 
        activation_2_layer = ['tanh', 'sigmoid', 'relu'],
        kernel_reg_2_layer = widgets.FloatText(
            value=0.0,
            disabled=False
        ),
        dropout_2 =  widgets.FloatText(
            value=0.0,
            disabled=False
        ),
        neurons_1_layer = (5,100,5),
        neurons_2_layer = (5,100,5),
        optimizer = ['adam', 'sgd', 'RMSprop'],
        learning_rate = widgets.FloatText(
            value=0.1,
            disabled=False
        ),
        loss = ['mse', 'mae', 'categorical_crossentropy'],
        metrics = ['accuracy', 'mae'],
        epochs = widgets.IntText(
            value=100,
            disabled=False
        )
    ):
    
    head = dict_datasets[dataset_name][0].head()   #first 5 rows in dataset
    Q = dict_datasets[dataset_name][1][0].shape[0] #rows -> int
    m = dict_datasets[dataset_name][1][0].shape[1] #input columns -> int
    p = dict_datasets[dataset_name][1][1].shape[1] #output columns -> int
    
    input_data = dict_datasets[dataset_name][1][0]
    output_data = dict_datasets[dataset_name][1][1]
    
    fig1 = go.Figure()
    fig2 = go.Figure()
    fig3 = go.Figure()
    
    x_axis = np.linspace(1, epochs, epochs)
    val_metrics = f'val_{metrics}'
    
    list_loss = []
    list_metric = []
    
    #create model neural network
    for _ in range(5):
        
        X_train, X_test, Y_train, Y_test = train_test_split(
            input_data, 
            output_data,
            test_size=test_size
        )
        X_train, X_test, Y_train, Y_test = X_train.astype(np.float32),X_test.astype(np.float32),Y_train.astype(np.float32),Y_test.astype(np.float32)
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(
                neurons_1_layer, 
                input_dim=m,
                activation=activation_1_layer,
                kernel_regularizer = tf.keras.regularizers.l2(float(kernel_reg_1_layer)),
                kernel_initializer='glorot_uniform' #xavier initialization
            ),
#             tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dropout(dropout_1),
            tf.keras.layers.Dense(
                neurons_2_layer, 
                activation=activation_2_layer,
                kernel_regularizer = tf.keras.regularizers.l2(float(kernel_reg_2_layer)),
                kernel_initializer='glorot_uniform'
            ),
#             tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dropout(dropout_2),
            tf.keras.layers.Dense(
                1, 
                activation='sigmoid',
                kernel_initializer='glorot_uniform',
            ),
        ])
        
#     error_num_val = [int(np.ceil((100*(1 - i))/(100./len(X_test)))) for i in history.history['val_accuracy']]
        
        model.compile(optimizer=optimizer,
#                   loss=[tf.keras.losses.MeanSquaredError(reduction=tf.keras.losses.Reduction.SUM)],
                      loss = [loss],
#                   metrics=[tf.keras.losses.MeanAbsoluteError(reduction=tf.keras.losses.Reduction.SUM)]
#                     metrics = [tf.keras.metrics.MeanAbsoluteError()]
                      metrics = [metrics]
                )
        
        history = model.fit(
            X_train,
            Y_train,
#             validation_split=0.1,
#             validation_data=(input_data_test_hack.values, output_data_test_hack.values),
            validation_data=(in_dataset_7_test,out_dataset_7_test),
            epochs=epochs,
            batch_size=batch_size,
            verbose=0
        )
#         print(history.history)
        fig1.add_trace(
            go.Scatter(
                x=x_axis,
                y=history.history['loss'],
                name=f'Train sample {_}',
                 line=dict(color='green')
            )
        )
        fig1.add_trace(
            go.Scatter(
                x=x_axis,
                y=history.history['val_loss'],
                name=f'Valid sample {_}',
                line=dict(color='black')
            )
        )
        fig2.add_trace(
            go.Scatter(
                x=x_axis,
#                 y=history.history['mean_absolute_error'],
                y=history.history[metrics],
                name=f'Train sample {_}',
                line=dict(color='green')
            )
        )
        fig2.add_trace(
            go.Scatter(
                x=x_axis,
#                 y=history.history['val_mean_absolute_error'],
                y=history.history[val_metrics],
                name=f'Valid sample {_}',
                line=dict(color='black')
            )
        )
#     fig3.add_trace(
#         go.Scatter(
#             x=x_axis,
#             y=error_num_val,
#             name='Valid sample'
#         )
#     )
#     print(history.history)
    
        eval_step = model.evaluate(in_dataset_7_test,out_dataset_7_test, batch_size=batch_size, verbose=0)
        list_loss.append(eval_step[0])
        list_metric.append(eval_step[1])
    
    fig1.update_layout(
        title='Loss',
        autosize=False,
        width=800,
        height=500,
        yaxis=dict(title_text=f'{loss}'),
        xaxis=dict(title_text='Epoch')
    )
    fig2.update_layout(
        title='Metrics',
        autosize=False,
        width=800,
        height=500,
        yaxis=dict(title_text=f'{metrics}'),
        xaxis=dict(title_text='Epoch')
    )
#     fig3.update_layout(
#         title='Errors',
#         autosize=False,
#         width=800,
#         height=500,
#         yaxis=dict(title_text='Errors'),
#         xaxis=dict(title_text='Epoch')
#     )
    
    print(f'{Q} sample with {m} input columns and {p} output')
    print(f'Size of train sample: {len(X_train)}')
    print(f'Size of test sample: {len(X_test)}')
    
    fig1.show()
    fig2.show()
#     fig3.show()
        
    ev = model.evaluate(X_test, Y_test, batch_size=batch_size, verbose=0)
    
    predictions = model.predict(X_test)
#     print(f'MAE test: {mean_absolute_error(predictions, Y_test)}')
#     print(f'Accuracy test: {accuracy_score(predictions, Y_test)}')
    
    if dataset_name == 'hackaton':
        predictions_test_hackaton = model.predict(input_data_test_hack)
        pred_max = []
        for i in predictions_test_hackaton:
            pred_max.append(tf.math.argmax(i).numpy() + 1)
        print(f'Prediction labels {pred_max}')
        mae_sklearn_hack = mean_absolute_error(pred_max, max_test_hack)
#         print(predictions_test_hackaton)
        print(f'Sklearn test hackaton mae {mae_sklearn_hack}')
    
#     mae_sklearn = mean_absolute_error(predictions, Y_test)

    #     error_num_end = int(np.ceil((100*(1 - ev[1]))/(100./len(X_test))))
    
#     print(f'Sklearn test mae {mae_sklearn}')
#     print(f'{metrics} test samples: {ev}')

    #     print(f'Errors : {error_num_end} ')
    
    list_loss_lm = []
    list_metric_lm = []
    
    print(f"{optimizer} ")
    print(f'MEAN {loss} {sum(list_loss)/len(list_loss)} MEAN {metrics} {sum(list_metric)/len(list_metric)}')
    print(f'MAX {loss} {max(list_loss)} MAX {metrics} {max(list_metric)}')
    print(f'MIN {loss} {min(list_loss)} MIN {metrics} {min(list_metric)}')
    
    print(f"------------------ LEVENBERG-MARQUARDT -----------------")
    
    fig4 = go.Figure()
    fig5 = go.Figure()

    for _ in range(5):
        
        model_wrapper = lm.ModelWrapper(tf.keras.models.clone_model(model))
        model_wrapper.compile(
#             solve_method='qr,
            optimizer=tf.keras.optimizers.SGD(learning_rate=1),
            loss=lm.MeanSquaredError(),
            metrics = [metrics]
        )
        history_lm = model_wrapper.fit(
            X_train,
            Y_train,
            epochs=epochs,
            verbose=0
        )
#         print(model_wrapper.train_step())
#         print(history_lm)
#         break
        if dataset_name == 'hackaton':
            pred_lm = model_wrapper.predict(input_data_test_hack)
            pred_max_lm = []
            for i in pred_lm:
                pred_max_lm.append(tf.math.argmax(i).numpy() + 1)
            print(f'Prediction labels lm {pred_max_lm}')
            mae_sklearn_hack_lm = mean_absolute_error(pred_max_lm, max_test_hack)
            print(f'Sklearn test hackaton mae lm {mae_sklearn_hack_lm}')
        
#         fig4.add_trace(
#             go.Scatter(
#                 x=x_axis,
#                 y=history_lm.history['loss'],
#                 name=f'Train sample {_}',
#                  line=dict(color='green')
#             )
#         )
#         fig4.add_trace(
#             go.Scatter(
#                 x=x_axis,
#                 y=history_lm.history['val_loss'],
#                 name=f'Valid sample {_}',
#                 line=dict(color='black')
#             )
#         )
#         fig5.add_trace(
#             go.Scatter(
#                 x=x_axis,
# #                 y=history.history['mean_absolute_error'],
#                 y=history_lm.history[metrics],
#                 name=f'Train sample {_}',
#                 line=dict(color='green')
#             )
#         )
#         fig5.add_trace(
#             go.Scatter(
#                 x=x_axis,
# #                 y=history.history['val_mean_absolute_error'],
#                 y=history_lm.history[val_metrics],
#                 name=f'Valid sample {_}',
#                 line=dict(color='black')
#             )
#         )
        
        
        ev_lm = model_wrapper.evaluate(in_dataset_7_test,out_dataset_7_test, batch_size=batch_size, verbose=0)
        list_loss_lm.append(ev_lm[0])
        list_metric_lm.append(ev_lm[1])
        
        #     print(ev_lm)
    
#     fig4.update_layout(
#         title='Loss',
#         autosize=False,
#         width=800,
#         height=500,
#         yaxis=dict(title_text=f'{loss}'),
#         xaxis=dict(title_text='Epoch')
#     )
#     fig5.update_layout(
#         title='Metrics',
#         autosize=False,
#         width=800,
#         height=500,
#         yaxis=dict(title_text=f'{metrics}'),
#         xaxis=dict(title_text='Epoch')
#     )
    
    
    print(f"LEVENBERG-MARQUARDT")
    print(f'MEAN {loss} {sum(list_loss_lm)/len(list_loss_lm)} MEAN {metrics} {sum(list_metric_lm)/len(list_metric_lm)}')
    print(f'MAX {loss} {max(list_loss_lm)} MAX {metrics} {max(list_metric_lm)}')
    print(f'MIN {loss} {min(list_loss_lm)} MIN {metrics} {min(list_metric_lm)}')
    
    return

interactive(children=(Dropdown(description='dataset_name', options=('simple', 'iris', 'cancer', 'glass', 'thyr…