In [None]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv3D, MaxPooling3D, UpSampling3D, Input
import pandas as pd

# Define the path to the folder containing the videos
videos_folder = 'AlgonautsVideos268_All_30fpsmax'

# Define the threshold for the maximum number of files to process
MAX_FILES = 1000
FRAME_HEIGHT = 64
FRAME_WIDTH = 64
FRAMES_PER_VIDEO = 16  # Number of frames to extract from each video

# Initialize variables to hold the data and labels
all_videos = []
labels = []

# Function to determine the label based on the filename
def get_label(filename):
    if 'inanimate' in filename.lower():
        return 0
    else:
        return 1

# Loop through all files in the folder
file_counter = 0
for root, dirs, files in os.walk(videos_folder):
    for file in files:
        if file.endswith(".mp4"):
            file_path = os.path.join(root, file)
            try:
                cap = cv2.VideoCapture(file_path)
                frames = []
                success, frame = cap.read()
                while success and len(frames) < FRAMES_PER_VIDEO:
                    frame = cv2.resize(frame, (FRAME_WIDTH, FRAME_HEIGHT))
                    frames.append(frame)
                    success, frame = cap.read()
                cap.release()

                if len(frames) == FRAMES_PER_VIDEO:
                    all_videos.append(frames)
                    labels.append(get_label(file))

                file_counter += 1
                if file_counter >= MAX_FILES:
                    break
            except Exception as e:
                print(f"Error processing file {file_path}: {e}")
    if file_counter >= MAX_FILES:
        break

# Convert lists to numpy arrays
all_videos = np.array(all_videos).astype('float32') / 255.0
labels = np.array(labels)

# Reshape videos for the 3D autoencoder
all_videos = all_videos.reshape((-1, FRAMES_PER_VIDEO, FRAME_HEIGHT, FRAME_WIDTH, 3))

print(f"Processed {len(labels)} videos.")

# Define the 3D Convolutional Autoencoder
input_vid = Input(shape=(FRAMES_PER_VIDEO, FRAME_HEIGHT, FRAME_WIDTH, 3))

# Encoder
x = Conv3D(32, (3, 3, 3), activation='relu', padding='same')(input_vid)
x = MaxPooling3D((2, 2, 2), padding='same')(x)
x = Conv3D(64, (3, 3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling3D((2, 2, 2), padding='same')(x)

# Decoder
x = Conv3D(64, (3, 3, 3), activation='relu', padding='same')(encoded)
x = UpSampling3D((2, 2, 2))(x)
x = Conv3D(32, (3, 3, 3), activation='relu', padding='same')(x)
x = UpSampling3D((2, 2, 2))(x)
decoded = Conv3D(3, (3, 3, 3), activation='sigmoid', padding='same')(x)

# Autoencoder model
autoencoder = Model(input_vid, decoded)
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')
autoencoder.summary()

# Train the autoencoder
autoencoder.fit(all_videos, all_videos, epochs=10, batch_size=2, shuffle=True, validation_split=0.2)

# Encoder model to extract encoded features
encoder = Model(input_vid, encoded)

# Get the encoded features
encoded_features = encoder.predict(all_videos)
encoded_features = encoded_features.reshape((encoded_features.shape[0], -1))

print(f"Encoded features shape: {encoded_features.shape}")

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(encoded_features, labels, test_size=0.2, random_state=42)

# Train a simple logistic regression classifier
classifier = LogisticRegression(max_iter=1000)
classifier.fit(x_train, y_train)

# Evaluate the classifier
y_pred = classifier.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Test Accuracy: {accuracy * 100:.2f}%')

# Save the encoded features and labels to a CSV file
encoded_features_df = pd.DataFrame(encoded_features)
encoded_features_df['label'] = labels
encoded_features_df.to_csv('encoded_features_with_labels.csv', index=False)


Processed 1000 videos.
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 16, 64, 64, 3)]   0         
                                                                 
 conv3d (Conv3D)             (None, 16, 64, 64, 32)    2624      
                                                                 
 max_pooling3d (MaxPooling3  (None, 8, 32, 32, 32)     0         
 D)                                                              
                                                                 
 conv3d_1 (Conv3D)           (None, 8, 32, 32, 64)     55360     
                                                                 
 max_pooling3d_1 (MaxPoolin  (None, 4, 16, 16, 64)     0         
 g3D)                                                            
                                                                 
 conv3d_2 (Conv3D)           (None, 4,