In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import BatchNormalization, Conv1D, MaxPooling1D, Flatten, Dropout, Dense, Activation

import tensorflow as tf

In [None]:
from lib.reproduction import major_oxides
import pandas as pd
from sklearn.metrics import mean_squared_error
from lib import full_flow_dataloader

from lib.norms import Norm1Scaler, Norm3Scaler

train_processed, test_processed = full_flow_dataloader.load_full_flow_data(load_cache_if_exits=False, average_shots=True)


In [None]:
train_processed

In [None]:
model = Sequential([
    # Batch normalization layer, adjusted for Conv1D
    BatchNormalization(input_shape=(6144, 1)),

    # First Conv1D Layer
    Conv1D(filters=8, kernel_size=5, strides=2, activation='relu', kernel_regularizer=0.001),
    MaxPooling1D(pool_size=2),
    
    # Second Conv1D Layer
    Conv1D(filters=16, kernel_size=5, strides=2, activation='relu', kernel_regularizer=0.001),
    MaxPooling1D(pool_size=2),

    # Additional Conv1D Layers as described
    Conv1D(filters=32, kernel_size=5, strides=2, activation='relu', kernel_regularizer=0.001),
    Conv1D(filters=64, kernel_size=5, strides=2, activation='relu', kernel_regularizer=0.001),
    Conv1D(filters=128, kernel_size=5, strides=2, activation='relu', kernel_regularizer=0.001),

    # Flatten Layer to transition from convolutional layers to dense layers
    Flatten(),

    # Dropout Layer for regularization
    Dropout(0.5),

    # Dense Layers
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),

    # Output Layer for regression
    Dense(8, activation='linear')  # Assuming you're predicting 8 continuous target variables
])
model.summary()

In [None]:
tf.random.set_seed(42)

In [None]:
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])

In [None]:
drop_cols = major_oxides + ["ID", "Sample Name"]

X_train = train_processed.drop(columns=drop_cols)
y_train = train_processed[major_oxides]

X_test = test_processed.drop(columns=drop_cols)
y_test = test_processed[major_oxides]

#X_train = train_processed.reshape((-1, 1538, 6154, 1))

In [None]:
#print(X_train.shape)
X_train_reshaped = X_train.to_numpy().reshape(-1, 6144, 1)
X_test_reshaped = X_test.to_numpy().reshape(-1, 6144, 1)

In [None]:
#import tensforflow as tf
import tensorflow as tf

callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                              patience=6, restore_best_weights=True)

history = model.fit(X_train_reshaped, y_train,
                    epochs=1000,
                    batch_size=10,
                    validation_split=0.2, callbacks=[callback])

In [None]:
predictions = model.predict(X_test_reshaped)
mse = mean_squared_error(y_test, predictions)

In [None]:
for i, oxide in enumerate(major_oxides):
    # make predictions for each oxide in major_oxides and y_test
    y_test_oxide = y_test[oxide]
    predictions_oxide = predictions[:, i]
    rmse = mean_squared_error(y_test_oxide, predictions_oxide, squared=False)
    # make predictions for each oxide in major_oxides and y_test
    #print rmse for each oxide
    print(f"RMSE for {oxide}: {rmse}")