In [None]:
import tensorflow as tf
print(tf.__version__)


2.17.1


In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, Bidirectional, LSTM, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.decomposition import PCA
import scipy.stats
from google.colab import drive
import zipfile

# Google Drive mount
drive.mount('/content/drive')

# Define paths
zip_file_path = '/content/drive/MyDrive/Pattern_recognition_Assignment2/pr_project_train_data.zip'
extract_folder = '/content/drive/MyDrive/Pattern_recognition_Assignment2/pr_project_train_data'
model_save_folder = '/content/drive/MyDrive/Pattern_recognition_Assignment2/models3'

# Extract the test dataset zip file
if not os.path.exists(extract_folder):
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(extract_folder)

# 중첩 디렉토리 처리
data_dir = extract_folder
while len(os.listdir(data_dir)) == 1 and os.path.isdir(os.path.join(data_dir, os.listdir(data_dir)[0])):
    data_dir = os.path.join(data_dir, os.listdir(data_dir)[0])

# Load dataset
def load_data(data_dir):
    X, y = [], []
    for file in os.listdir(data_dir):
        if file.endswith(".txt"):
            file_path = os.path.join(data_dir, file)
            data = np.loadtxt(file_path)
            X.append(data)
            y.append(int(file.split('_')[-1][0]))  # Extract class label from filename
    return np.array(X), np.array(y)

# extract features
def extract_features(X):
    features = []
    for sample in X:
        mean = np.mean(sample, axis=0)
        std = np.std(sample, axis=0)
        minimum = np.min(sample, axis=0)
        maximum = np.max(sample, axis=0)
        median = np.median(sample, axis=0)
        abs_mean = np.mean(np.abs(sample), axis=0)
        skewness = scipy.stats.skew(sample, axis=0)
        kurtosis = scipy.stats.kurtosis(sample, axis=0)
        sma = np.sum(np.abs(sample), axis=0)

        time_features = np.hstack([
            mean, std, minimum, maximum, median, abs_mean, skewness, kurtosis, sma
        ])

        fft = np.fft.rfft(sample, axis=0)
        fft_magnitude = np.abs(fft)
        fft_mean = np.mean(fft_magnitude, axis=0)
        fft_std = np.std(fft_magnitude, axis=0)
        fft_top_n = np.sort(fft_magnitude, axis=0)[-5:].flatten()
        freq_centroid = np.sum(np.arange(fft_magnitude.shape[0])[:, None] * fft_magnitude, axis=0) / np.sum(fft_magnitude, axis=0)
        freq_entropy = -np.sum((fft_magnitude / np.sum(fft_magnitude, axis=0)) * np.log(fft_magnitude / np.sum(fft_magnitude, axis=0) + 1e-10), axis=0)
        power_spectrum = np.sum(fft_magnitude ** 2, axis=0)

        freq_features = np.hstack([
            fft_mean, fft_std, fft_top_n, freq_centroid, freq_entropy, power_spectrum
        ])

        features.append(np.hstack([time_features, freq_features]))
    return np.array(features)

# Load and preprocess data
X_raw, y_raw = load_data(data_dir)
X_features = extract_features(X_raw)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_features)

encoder = OneHotEncoder(sparse_output=False)
y_encoded = encoder.fit_transform(y_raw.reshape(-1, 1))

# PCA for dimensionality reduction
pca = PCA(n_components=min(150, min(X_scaled.shape[0], X_scaled.shape[1])))
X_pca = pca.fit_transform(X_scaled)

X_train, X_val, y_train, y_val = train_test_split(X_pca, y_encoded, test_size=0.2, random_state=42)

# CNN model
def build_cnn_model(input_shape, num_classes):
    model = Sequential([
        Conv1D(64, kernel_size=3, activation='relu', input_shape=(input_shape, 1)),
        MaxPooling1D(pool_size=2),
        Conv1D(128, kernel_size=3, activation='relu'),
        MaxPooling1D(pool_size=2),
        Flatten(),
        Dense(256, activation='relu'),
        Dropout(0.4),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# BiLSTM model
def build_bilstm_model(input_shape, num_classes):
    model = Sequential([
        Bidirectional(LSTM(256, return_sequences=True)),
        Dropout(0.4),
        BatchNormalization(),
        Bidirectional(LSTM(128)),
        Dense(256, activation='relu'),
        Dropout(0.4),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Train and evaluate function
def train_and_evaluate(model, model_name):
    callbacks = [
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, verbose=1)
        ]
    if model_name == "cnn_model":
        X_train_reshaped = np.expand_dims(X_train, axis=-1)
        X_val_reshaped = np.expand_dims(X_val, axis=-1)
        model.fit(X_train_reshaped, y_train, validation_data=(X_val_reshaped, y_val), epochs=50, batch_size=32, callbacks=callbacks)
        model_path = os.path.join(model_save_folder, "cnn_model.keras")
    elif model_name == "bilstm_model":
        X_train_reshaped = np.expand_dims(X_train, axis=-1)
        X_val_reshaped = np.expand_dims(X_val, axis=-1)
        model.fit(X_train_reshaped, y_train, validation_data=(X_val_reshaped, y_val), epochs=50, batch_size=32, callbacks=callbacks)
        model_path = os.path.join(model_save_folder, "bilstm_model.keras")
    model.save(model_path)
    _, accuracy = model.evaluate(X_val_reshaped, y_val, verbose=0)
    return accuracy, model_path

# Train models
cnn_model = build_cnn_model(X_pca.shape[1], y_encoded.shape[1])
cnn_acc, cnn_model_path = train_and_evaluate(cnn_model, "cnn_model")

bilstm_model = build_bilstm_model((X_pca.shape[1], 1), y_encoded.shape[1])
bilstm_acc, bilstm_model_path = train_and_evaluate(bilstm_model, "bilstm_model")

# Ensemble results
print('\n-------|  Ensemble Results  |-------')
ensemble_acc = (cnn_acc + bilstm_acc) / 2
print(f'Ensemble Accuracy: {ensemble_acc:.2%}')

# Compare results and print
if cnn_acc > bilstm_acc:
    final_model_path = cnn_model_path
    print('\n-------|  Final Result  |----------')
    print('CNN Accuracy     : {:.2%}'.format(cnn_acc))
    print('BiLSTM Accuracy  : {:.2%}'.format(bilstm_acc))
    print("\nCNN model is superior. Selected as the final model.")
    print('Final Accuracy   : {:.2%}'.format(cnn_acc))
    print(f"\nCNN model saved at {final_model_path}.")
else:
    final_model_path = bilstm_model_path
    print('\n-------|  Final Result  |----------')
    print('CNN Accuracy     : {:.2%}'.format(cnn_acc))
    print('BiLSTM Accuracy  : {:.2%}'.format(bilstm_acc))
    print("\nBiLSTM model is superior. Selected as the final model.")
    print('Final Accuracy   : {:.2%}'.format(bilstm_acc))
    print(f"\nBiLSTM model saved at {final_model_path}.")


Mounted at /content/drive


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 17ms/step - accuracy: 0.5229 - loss: 1.1043 - val_accuracy: 0.7912 - val_loss: 0.5527 - learning_rate: 0.0010
Epoch 2/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7791 - loss: 0.5439 - val_accuracy: 0.8085 - val_loss: 0.4444 - learning_rate: 0.0010
Epoch 3/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8273 - loss: 0.4282 - val_accuracy: 0.8451 - val_loss: 0.3923 - learning_rate: 0.0010
Epoch 4/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8430 - loss: 0.3753 - val_accuracy: 0.8379 - val_loss: 0.3850 - learning_rate: 0.0010
Epoch 5/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8560 - loss: 0.3463 - val_accuracy: 0.8501 - val_loss: 0.3652 - learning_rate: 0.0010
Epoch 6/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1