In [7]:
import os
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, LSTM, Dense, TimeDistributed, Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from PIL import UnidentifiedImageError

# Constants
IMG_HEIGHT, IMG_WIDTH = 64, 64
SEQUENCE_LENGTH = 10  # Number of frames in a sequence
BATCH_SIZE = 32
EPOCHS = 10

# Paths to Ped1 and Ped2 train and test frames (Using raw strings to avoid Unicode escape errors)
ped1_train_path = r'C:\Users\abhishik chebrolu\Downloads\AINN pro\UCSD_Anomaly_Dataset\UCSD_Anomaly_Dataset.v1p2\UCSDped1\Train'
ped1_test_path = r'C:\Users\abhishik chebrolu\Downloads\AINN pro\UCSD_Anomaly_Dataset\UCSD_Anomaly_Dataset.v1p2\UCSDped1\Test'
ped2_train_path = r'C:\Users\abhishik chebrolu\Downloads\AINN pro\UCSD_Anomaly_Dataset\UCSD_Anomaly_Dataset.v1p2\UCSDped2\Train'
ped2_test_path = r'C:\Users\abhishik chebrolu\Downloads\AINN pro\UCSD_Anomaly_Dataset\UCSD_Anomaly_Dataset.v1p2\UCSDped2\Test'

# Function to load frames and form sequences from each directory
def load_frames(data_path, label, sequence_length=SEQUENCE_LENGTH):
    sequences = []
    labels = []
    for folder in os.listdir(data_path):
        folder_path = os.path.join(data_path, folder)
        
        # Skip if it's not a directory (like .DS_Store or other files)
        if not os.path.isdir(folder_path):
            continue
        
        frames = []
        for frame in sorted(os.listdir(folder_path)):  # Ensure frames are loaded in correct order
            frame_path = os.path.join(folder_path, frame)
            
            # Only proceed if the file has a valid image extension
            if frame_path.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tif')):
                try:
                    # Try to load the image, and handle cases where the file is not a valid image
                    image = load_img(frame_path, target_size=(IMG_HEIGHT, IMG_WIDTH))
                    image_array = img_to_array(image) / 255.0  # Normalize images
                    frames.append(image_array)
                except UnidentifiedImageError:
                    print(f"Skipping invalid image: {frame_path}")
                    continue
                except OSError as e:
                    print(f"Skipping unsupported image format: {frame_path}. Error: {e}")
                    continue
            
            # When we have enough frames for a sequence, we add it
            if len(frames) == sequence_length:
                sequences.append(np.array(frames))
                labels.append(label)
                frames = []  # Reset frames for the next sequence
    
    # Debugging print statements to check if any data was loaded
    print(f"Loaded {len(sequences)} sequences from {data_path}")
    return np.array(sequences), np.array(labels)

# Load data for Ped1 and Ped2 training and testing
def load_ucsd_data():
    ped1_train_sequences, ped1_train_labels = load_frames(ped1_train_path, label=0)  # Normal label
    ped1_test_sequences, ped1_test_labels = load_frames(ped1_test_path, label=1)    # Anomalous label
    ped2_train_sequences, ped2_train_labels = load_frames(ped2_train_path, label=0)
    ped2_test_sequences, ped2_test_labels = load_frames(ped2_test_path, label=1)

    # Combine all sequences for training and testing
    X_train = np.concatenate([ped1_train_sequences, ped2_train_sequences], axis=0)
    y_train = np.concatenate([ped1_train_labels, ped2_train_labels], axis=0)
    X_test = np.concatenate([ped1_test_sequences, ped2_test_sequences], axis=0)
    y_test = np.concatenate([ped1_test_labels, ped2_test_labels], axis=0)

    # Debugging print statements to check the shape of the data
    print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
    print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")
    
    return X_train, y_train, X_test, y_test

# Build the hybrid CNN-LSTM model
def build_hybrid_model():
    model = Sequential()

    # CNN layers
    model.add(TimeDistributed(Conv2D(32, (3, 3), activation='relu', padding='same'), input_shape=(SEQUENCE_LENGTH, IMG_HEIGHT, IMG_WIDTH, 3)))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    model.add(TimeDistributed(Conv2D(64, (3, 3), activation='relu', padding='same')))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    model.add(TimeDistributed(Flatten()))

    # LSTM layer
    model.add(LSTM(64, return_sequences=False))

    # Fully connected layers
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    # Compile the model
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model

# Load UCSD data
X_train, y_train, X_test, y_test = load_ucsd_data()

# Check if any data was loaded
if len(X_train) == 0 or len(y_train) == 0:
    raise ValueError("No training data was loaded. Please check the dataset paths or formats.")

# Split training data into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Build the model
model = build_hybrid_model()
model.summary()

# Train the model
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=EPOCHS, batch_size=BATCH_SIZE)

# Evaluate on test set
y_pred = (model.predict(X_test) > 0.5).astype("int32")
test_accuracy = accuracy_score(y_test, y_pred)
print(f'Testing Accuracy: {test_accuracy * 100:.2f}%')

# Evaluate training accuracy
train_predictions = (model.predict(X_train) > 0.5).astype("int32")
train_accuracy = accuracy_score(y_train, train_predictions)
print(f'Training Accuracy: {train_accuracy * 100:.2f}%')


Loaded 680 sequences from C:\Users\abhishik chebrolu\Downloads\AINN pro\UCSD_Anomaly_Dataset\UCSD_Anomaly_Dataset.v1p2\UCSDped1\Train
Skipping unsupported image format: C:\Users\abhishik chebrolu\Downloads\AINN pro\UCSD_Anomaly_Dataset\UCSD_Anomaly_Dataset.v1p2\UCSDped1\Test\Test017\142.tif. Error: -2
Loaded 919 sequences from C:\Users\abhishik chebrolu\Downloads\AINN pro\UCSD_Anomaly_Dataset\UCSD_Anomaly_Dataset.v1p2\UCSDped1\Test
Loaded 255 sequences from C:\Users\abhishik chebrolu\Downloads\AINN pro\UCSD_Anomaly_Dataset\UCSD_Anomaly_Dataset.v1p2\UCSDped2\Train
Loaded 402 sequences from C:\Users\abhishik chebrolu\Downloads\AINN pro\UCSD_Anomaly_Dataset\UCSD_Anomaly_Dataset.v1p2\UCSDped2\Test
X_train shape: (935, 10, 64, 64, 3), y_train shape: (935,)
X_test shape: (1321, 10, 64, 64, 3), y_test shape: (1321,)


  super().__init__(**kwargs)


Epoch 1/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 465ms/step - accuracy: 0.9666 - loss: 0.1273 - val_accuracy: 1.0000 - val_loss: 4.4504e-04
Epoch 2/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 424ms/step - accuracy: 1.0000 - loss: 7.4683e-04 - val_accuracy: 1.0000 - val_loss: 1.2555e-04
Epoch 3/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 424ms/step - accuracy: 1.0000 - loss: 3.4310e-04 - val_accuracy: 1.0000 - val_loss: 7.4559e-05
Epoch 4/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 421ms/step - accuracy: 1.0000 - loss: 2.7192e-04 - val_accuracy: 1.0000 - val_loss: 4.8678e-05
Epoch 5/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 433ms/step - accuracy: 1.0000 - loss: 1.6546e-04 - val_accuracy: 1.0000 - val_loss: 3.4173e-05
Epoch 6/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 439ms/step - accuracy: 1.0000 - loss: 1.0373e-04 - val_accuracy: 1.0000 -