In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.impute import SimpleImputer as SI
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RandomizedSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LSTM
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
import warnings
import pickle

warnings.filterwarnings('ignore')

# Data Preparation
t_data = []
t_labels = []

folder = 'measurements'

for i in range(1, 91):
    f = os.path.join(folder, f'T_{i}.csv')
    
    try:
        df = pd.read_csv(f, header=None, delimiter=',', names=[0, 1, 2])
    except pd.errors.ParserError:
        try:
            df = pd.read_csv(f, header=None, delimiter='\t', names=[0, 1, 2])
        except pd.errors.ParserError:
            try:
                df = pd.read_csv(f, header=None, delimiter=' ', names=[0, 1, 2])
            except pd.errors.ParserError as e:
                print(f"Error reading {f}: {e}")
                continue
    
    df = df.to_numpy()
    df = pd.to_numeric(df.flatten(), errors='coerce').reshape(df.shape)

    if not np.isfinite(df).all():
        df[~np.isfinite(df)] = np.nan
        imp = SI(strategy='mean')
        df = imp.fit_transform(df)
    
    for j in range(df.shape[0]):
        if (1 <= j+1 <= 101) or (104 <= j+1 <= 204) or (206 <= j+1 <= 308):
            t_labels.append(1)
            t_data.append(df[j])
        elif (310 <= j+1 <= 411) or (413 <= j+1 <= 514) or (516 <= j+1 <= 618):
            t_labels.append(0)
            t_data.append(df[j])

t_data = np.array(t_data)
t_labels = np.array(t_labels)

# Normalize the data
scaler = StandardScaler()
t_data = scaler.fit_transform(t_data)

# Reshape data for RNN: (samples, time_steps, features)
t_data = t_data.reshape((t_data.shape[0], 1, t_data.shape[1]))

# Save the scaler for later use
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

# Define the RNN Model
def create_model(optimizer='adam', init='he_uniform', dropout_rate=0.5, neurons=100, learning_rate=0.001):
    if optimizer == 'adam':
        optimizer = Adam(learning_rate=learning_rate)
    elif optimizer == 'rmsprop':
        optimizer = RMSprop(learning_rate=learning_rate)
    
    model = Sequential([
        LSTM(neurons, activation='relu', kernel_initializer=init, input_shape=(1, t_data.shape[2])),
        BatchNormalization(),
        Dropout(dropout_rate),
        Dense(neurons//2, activation='relu', kernel_initializer=init),
        BatchNormalization(),
        Dropout(dropout_rate),
        Dense(1, activation='sigmoid', kernel_initializer=init)
    ])
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    return model

model = KerasClassifier(build_fn=create_model, verbose=0)

# Hyperparameter tuning
param_dist = {
    'batch_size': [16, 32, 64],
    'epochs': [50, 100],
    'optimizer': ['adam', 'rmsprop'],
    'init': ['he_uniform', 'glorot_uniform'],
    'dropout_rate': [0.3, 0.5],
    'neurons': [32, 64, 128],
    'learning_rate': [0.001, 0.01]
}

random_search = RandomizedSearchCV(estimator=model, param_distributions=param_dist, n_iter=10, cv=3, n_jobs=-1)
random_result = random_search.fit(t_data, t_labels)

# Print best parameters
print(f"Best parameters found: {random_result.best_params_}")
print(f"Best accuracy: {random_result.best_score_}")

# Train the model
best_model = random_result.best_estimator_.model

# Save the trained model 
model_file = 'rnn_model'
best_model.save(model_file, save_format='tf')

print(f"Model trained and saved as {model_file}")


Best parameters found: {'optimizer': 'rmsprop', 'neurons': 128, 'learning_rate': 0.001, 'init': 'glorot_uniform', 'epochs': 100, 'dropout_rate': 0.5, 'batch_size': 32}
Best accuracy: 0.9786855578422546
INFO:tensorflow:Assets written to: rnn_model\assets
Model trained and saved as rnn_model
