In [4]:
import pandas as pd
import numpy as np

# Load the training data (You might have to change the file path based on how you're working)
train_df = pd.read_csv('/kaggle/input/beginners-hypothesis-25/BH25/Training_Data/train.csv')

print(train_df.head(10))

# For this notebook, we'll only train for 1000 video samples

train_df = train_df[:5000]

   video_id element    motion power  speed          video_summary
0         1    Erde    linear   rot    9.6     (3.8147, 31.94809)
1         2   Feuer       shm  grin    9.6   (26.70288, -4.29153)
2         3   Feuer    random  geld    9.6      (3.8147, 8.58307)
3         4    Erde    zigzag  lila    6.9  (-24.79553, -0.95367)
4         5    Erde    linear  lila    9.6    (7.62939, 22.88818)
5         6    Erde  circular  blau    9.6   (4.76837, -26.70288)
6         7   Boden    linear  btau    9.6  (-22.88818, 24.79553)
7         8   Boden    random   rut    6.9  (18.59665, -15.25879)
8         9    Erde    random   rot    6.9    (5.72205, -0.59605)
9        10    Erde    linear  helb    6.9    (8.10623, -5.24521)


In [5]:
import cv2 

def process_video(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (64, 64))
        frames.append(frame)
    cap.release()
    frames = np.array(frames)  # Shape: (20, 64, 64, 3)
    return frames  # Flatten to 1D array
    
video_features = []
for idx, row in train_df.iterrows():
    video_path = f"/kaggle/input/beginners-hypothesis-25/BH25/Training_Data/Train_Videos/{row['video_id']}.mp4"
    video_features.append(process_video(video_path))

video_features = np.array(video_features)  # Shape: (num_samples, 20*64*64*3)

In [6]:
from sklearn.preprocessing import LabelEncoder

# Encode categorical attributes using the Label Encoder
def encode_categorical_columns(df, columns):
    label_encoders = {}
    for col in columns:
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
        label_encoders[col] = le
    return df, label_encoders

categorical_columns = ['element', 'motion', 'power']
train_df, label_encoders = encode_categorical_columns(train_df, categorical_columns)
for col_name, le in label_encoders.items():
    print(f"\nClasses for column '{col_name}':")
    # This will print the mapping of numbers to original categories
    print(f"Number to Category Mapping: {dict(enumerate(le.classes_))}")


Classes for column 'element':
Number to Category Mapping: {0: 'Boden', 1: 'Erde', 2: 'Feuer', 3: 'Luft', 4: 'Wasser'}

Classes for column 'motion':
Number to Category Mapping: {0: 'circular', 1: 'linear', 2: 'random', 3: 'shm', 4: 'zigzag'}

Classes for column 'power':
Number to Category Mapping: {0: 'baau', 1: 'blau', 2: 'blru', 3: 'brun', 4: 'btau', 5: 'gaun', 6: 'gela', 7: 'gelb', 8: 'geld', 9: 'gele', 10: 'gelt', 11: 'got', 12: 'grin', 13: 'grun', 14: 'grus', 15: 'helb', 16: 'lela', 17: 'liga', 18: 'lila', 19: 'lili', 20: 'lisa', 21: 'lula', 22: 'not', 23: 'plau', 24: 'rat', 25: 'rod', 26: 'rot', 27: 'rut', 28: 'slau', 29: 'trun'}


In [63]:
from sklearn.model_selection import train_test_split


y_speed = train_df[['speed']]
y_summary = train_df['video_summary'].apply(lambda x: eval(x))  # Convert string tuples to actual tuples

y_categorical = train_df[categorical_columns]

# Split data for training and validation
X_train_speed, X_val_speed, y_train_speed, y_val_speed = train_test_split(video_features, y_speed, test_size=0.2, random_state=42)
X_train_cat, X_val_cat, y_train_cat, y_val_cat = train_test_split(video_features, y_categorical, test_size=0.2, random_state=42)
X_train_summary, X_val_summary, y_train_summary, y_val_summary = train_test_split(video_features, y_summary, test_size=0.2, random_state=42)

In [64]:
X_train_cat.shape

(4000, 20, 64, 64, 3)

In [61]:
y_train_speed=np.array(y_train_speed)
y_train_summary=np.array(y_train_summary)
y_val_speed=np.array(y_val_speed)
y_val_summary=np.array(y_val_summary)
y_val_cat=np.array(y_val_cat)
y_train_cat=np.array(y_train_cat)

In [72]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Conv3D,
    MaxPooling3D,
    Flatten,
    Dense,
    Dropout,
    BatchNormalization,
)

# Define input shape (e.g., a video with dimensions: frames x height x width x channels)
input_shape = (20, 64, 64, 3)  # 16 frames of 128x128 images with 3 color channels (RGB)

# Create the 3D CNN model
def build_3d_cnn(input_shape, num_classes):
    model = Sequential()

    # First 3D Convolutional Layer
    model.add(Conv3D(32, kernel_size=(3, 3, 3), activation='relu', input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(MaxPooling3D(pool_size=(1, 2, 2)))

    # Second 3D Convolutional Layer
    model.add(Conv3D(64, kernel_size=(3, 3, 3), activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling3D(pool_size=(1, 2, 2)))

    # Third 3D Convolutional Layer
    model.add(Conv3D(128, kernel_size=(3, 3, 3), activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling3D(pool_size=(1, 2, 2)))

    # Flatten and Fully Connected Layers
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))

    # Output Layer
    model.add(Dense(num_classes, activation='softmax'))  # Use 'sigmoid' for binary classification

    return model

# Instantiate the model
model={}
for col in categorical_columns:
    model[col] = build_3d_cnn(input_shape, len(label_encoders[col].classes_))

# Compile the model
    model[col].compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',  # Use 'binary_crossentropy' for binary classification
              metrics=['accuracy'])

# Summary of the model
    model[col].summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [65]:
y_train_cat=np.array(y_train_cat)
y_val_cat=np.array(y_val_cat)

In [71]:
X_train_cat=np.array(X_train_cat)
X_val_cat=np.array(X_val_cat)

In [75]:
X_train_cat.shape

(4000, 20, 64, 64, 3)

In [76]:
model["motion"].fit(X_train_cat, y_train_cat[:,1], validation_data=(X_val_cat, y_val_cat[:,1]), epochs=10, batch_size=32)

Epoch 1/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 174ms/step - accuracy: 0.9922 - loss: 0.2371 - val_accuracy: 1.0000 - val_loss: 0.0000e+00
Epoch 2/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 154ms/step - accuracy: 0.9945 - loss: 0.1770 - val_accuracy: 1.0000 - val_loss: 0.0000e+00
Epoch 3/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 155ms/step - accuracy: 0.9928 - loss: 0.3014 - val_accuracy: 1.0000 - val_loss: 7.8683e-06
Epoch 4/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 162ms/step - accuracy: 0.9948 - loss: 0.1306 - val_accuracy: 1.0000 - val_loss: 2.9891e-04
Epoch 5/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 163ms/step - accuracy: 0.9948 - loss: 0.1240 - val_accuracy: 1.0000 - val_loss: 0.0000e+00
Epoch 6/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 164ms/step - accuracy: 0.9976 - loss: 0.1126 - val_accuracy: 1.0000 - val_los

<keras.src.callbacks.history.History at 0x7e31368f9630>

In [10]:
import numpy as np

# Assuming X_train_cat and X_val_cat have the shape (800, 20, 64, 64, 3)
# We want to extract the first frame (index 0) from each sample

# Extract the first frame (index 0) from each sample in the training and validation sets
X_train_single_frame = X_train_cat[:, 0, :, :, :]  # Shape becomes (800, 64, 64, 3)
X_val_single_frame = X_val_cat[:, 0, :, :, :]  # Shape becomes (200, 64, 64, 3)

# Check the new shapes
print(f"X_train_single_frame shape: {X_train_single_frame.shape}")
print(f"X_val_single_frame shape: {X_val_single_frame.shape}")


X_train_single_frame shape: (4000, 64, 64, 3)
X_val_single_frame shape: (1000, 64, 64, 3)


In [11]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Define the model architecture
model_element = models.Sequential()

# Convolutional Layers (for feature extraction from images)
model_element.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)))  # 64x64 RGB images
model_element.add(layers.MaxPooling2D((2, 2)))

model_element.add(layers.Conv2D(64, (3, 3), activation='relu'))
model_element.add(layers.MaxPooling2D((2, 2)))

model_element.add(layers.Conv2D(128, (3, 3), activation='relu'))
model_element.add(layers.MaxPooling2D((2, 2)))

# Flatten the output for the fully connected layers
model_element.add(layers.Flatten())

# Fully connected layers
model_element.add(layers.Dense(128, activation='relu'))  # Hidden dense layer
model_element.add(layers.Dense(len(set(y_train_cat[:,0])), activation='softmax'))  # Output layer with softmax for classification

# Compile the model
model_element.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',  # Since y_train is label-encoded (integer labels)
              metrics=['accuracy'])

# Summary of the model to check its structure
model_element.summary()

# Train the model
history = model_element.fit(
    X_train_single_frame,  # Training images
    y_train_cat[:,0],  # Training labels (assuming the element column contains label-encoded integers)
    validation_data=(X_val_single_frame, y_val_cat[:,0]),  # Validation images and labels
    epochs=10,  # Set the number of epochs
    batch_size=32  # Batch size for training
)

# Optionally, evaluate the model on the validation set
val_loss, val_accuracy = model_element.evaluate(X_val_single_frame, y_val_cat[:,0])
print(f"Validation Accuracy: {val_accuracy:.4f}")


Epoch 1/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 18ms/step - accuracy: 0.3848 - loss: 5.6054 - val_accuracy: 0.8120 - val_loss: 0.5617
Epoch 2/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8762 - loss: 0.3801 - val_accuracy: 0.9060 - val_loss: 0.2368
Epoch 3/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9528 - loss: 0.1375 - val_accuracy: 0.9600 - val_loss: 0.1142
Epoch 4/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9831 - loss: 0.0651 - val_accuracy: 0.9710 - val_loss: 0.0700
Epoch 5/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9927 - loss: 0.0243 - val_accuracy: 0.9630 - val_loss: 0.0962
Epoch 6/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9828 - loss: 0.0722 - val_accuracy: 0.9820 - val_loss: 0.0537
Epoch 7/10
[1m125/125[0m 

In [100]:


color_classes = [
    ("Red", (0, 5)),
    ("Red-Orange", (6, 20)),
    ("Orange", (21, 35)),
    ("Yellow-Orange", (36, 50)),
    ("Yellow", (51, 65)),
    ("Yellow-Green", (66, 80)),
    ("Green", (81, 100)),
    ("Green-Blue", (101, 120)),
    ("Blue", (121, 150)),
    ("Blue-Purple", (151, 170)),
    ("Purple", (171, 190)),
    ("Magenta", (191, 210)),
    ("Pink", (211, 230)),
    ("Violet", (231, 250)),
    ("Lavender", (251, 270)),
    ("Plum", (271, 285)),
    ("Peach", (286, 300)),
    ("Burgundy", (301, 315)),
    ("Maroon", (316, 330)),
    ("Chocolate", (331, 340)),
    ("Bronze", (341, 350)),
    ("Teal", (351, 355)),
    ("Turquoise", (356, 365)),
    ("Lime", (366, 375)),
    ("Emerald", (376, 385)),
    ("Olive", (386, 395)),
    ("Forest", (396, 405)),
    ("Sky", (406, 420)),
    ("Aqua", (421, 430)),
    ("Cyan", (431, 440)),
    ("Ice Blue", (441, 450)),
    ("Silver", (451, 460)),
]

def convert_to_hsv(image):
    """Convert an image from BGR to HSV color space."""
    return cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

def extract_dominant_color(hsv_image):
    """Extract the dominant color from the HSV image."""
    # Split the HSV channels
    h, s, v = cv2.split(hsv_image)
    
    # Calculate the average hue value
    avg_hue = np.mean(h)
    
    # Map the average hue to one of the 30 color classes
    for color_name, (h_min, h_max) in color_classes:
        if h_min <= avg_hue <= h_max:
            return color_name
    
    return "Unknown"  # If no match found

def get_dominant_colors(images):
    """Process a batch of images and return their dominant colors."""
    dominant_colors = []
    for image in images:
        hsv_image = convert_to_hsv(image)
        color = extract_dominant_color(hsv_image)
        dominant_colors.append(color)
    return dominant_colors

# Assuming X_train_single_frame and X_val_single_frame are loaded
# Get dominant colors for training and validation datasets
train_dominant_colors = get_dominant_colors(X_train_single_frame)
val_dominant_colors = get_dominant_colors(X_val_single_frame)

# Encode the labels using LabelEncoder (since you already have your labels encoded)
label_encoder = LabelEncoder()
train_labels = label_encoder.fit_transform(train_dominant_colors)
val_labels = label_encoder.transform(val_dominant_colors)

# Build the model for color classification
def build_color_based_classification_model(input_shape=(64, 64, 3), num_classes=30):
    model = models.Sequential()
    
    # First Convolutional Block
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(layers.MaxPooling2D((2, 2)))
    
    # Second Convolutional Block
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    
    # Third Convolutional Block
    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    
    # Flatten and Fully Connected Layers
    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.5))  # Dropout for regularization
    
    # Output Layer (softmax for classification)
    model.add(layers.Dense(num_classes, activation='softmax'))  # 30 classes for color prediction
    
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    
    return model

# Number of color classes (30 in your case)
num_classes = len(color_classes)

# Build the model
color_model = build_color_based_classification_model(input_shape=(64, 64, 3), num_classes=num_classes)

# Print model summary
color_model.summary()

# Train the model
color_model.fit(X_train_single_frame, train_labels, validation_data=(X_val_single_frame, val_labels), epochs=20, batch_size=32)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 14ms/step - accuracy: 0.6846 - loss: 4.2286 - val_accuracy: 0.8090 - val_loss: 0.5969
Epoch 2/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7990 - loss: 0.6462 - val_accuracy: 0.8070 - val_loss: 0.6095
Epoch 3/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8066 - loss: 0.5918 - val_accuracy: 0.8120 - val_loss: 0.5813
Epoch 4/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8078 - loss: 0.6051 - val_accuracy: 0.7980 - val_loss: 0.5875
Epoch 5/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8037 - loss: 0.5945 - val_accuracy: 0.8160 - val_loss: 0.5612
Epoch 6/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7983 - loss: 0.5843 - val_accuracy: 0.8210 - val_loss: 0.5761
Epoch 7/20
[1m125/125[0m 

<keras.src.callbacks.history.History at 0x7e3127e05570>

In [13]:

# Optionally, evaluate the model on the validation set
val_loss, val_accuracy = color_model.evaluate(X_val_single_frame, val_labels)
print(f"Validation Accuracy: {val_accuracy:.4f}")

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8402 - loss: 0.5295
Validation Accuracy: 0.8400


In [14]:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np

# Assuming y_train_summary and y_val_summary are lists of (x, y) pairs
x_values, y_values = zip(*y_train_summary)
x_val_values, y_val_values = zip(*y_val_summary)

# Convert x_values and y_values into numpy arrays if they aren't already
x_values = np.array(x_values)
y_values = np.array(y_values)
x_val_values = np.array(x_val_values)
y_val_values = np.array(y_val_values)

def build_3d_model_for_x_and_y(input_shape=(20, 64, 64, 3)):
    inputs = layers.Input(shape=input_shape)

    # Shared 3D CNN layers (to process video data)
    x = layers.Conv3D(32, (3, 3, 3), activation='relu', padding='same')(inputs)  # Padding set to 'same' to keep dimensions
    x = layers.MaxPooling3D((1, 2, 2), padding='same')(x)  # Use 'same' padding
    x = layers.Conv3D(64, (3, 3, 3), activation='relu', padding='same')(x)
    x = layers.MaxPooling3D((1, 2, 2), padding='same')(x)
    x = layers.Conv3D(128, (3, 3, 3), activation='relu', padding='same')(x)
    x = layers.MaxPooling3D((1, 2, 2), padding='same')(x)

    # Flatten the output to pass it to fully connected layers
    x = layers.Flatten()(x)

    # Separate outputs for x and y
    output_x = layers.Dense(1, name='x_output')(x)  # For x value prediction
    output_y = layers.Dense(1, name='y_output')(x)  # For y value prediction
    
    # Create the model
    model = models.Model(inputs=inputs, outputs=[output_x, output_y])

    # Compile the model with optimizer, loss, and separate metrics for each output
    model.compile(optimizer='adam', 
                  loss='mean_squared_error',
                  metrics={'x_output': 'mae', 'y_output': 'mae'})  # Separate metrics for each output

    return model

# Build the 3D CNN model
model_for_x_and_y_3d = build_3d_model_for_x_and_y(input_shape=(20, 64, 64, 3))

# Print model summary to inspect the architecture
model_for_x_and_y_3d.summary()

# Train the model
model_for_x_and_y_3d.fit(X_train_summary, [x_values, y_values], 
                         validation_data=(X_val_summary, [x_val_values, y_val_values]), 
                         epochs=10, batch_size=32)


Epoch 1/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 218ms/step - loss: 15570.2158 - x_output_loss: 12314.4229 - x_output_mae: 34.2785 - y_output_loss: 3255.7917 - y_output_mae: 22.4846 - val_loss: 464.6028 - val_x_output_loss: 251.1288 - val_x_output_mae: 12.6056 - val_y_output_loss: 228.4463 - val_y_output_mae: 11.5415
Epoch 2/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 194ms/step - loss: 482.8817 - x_output_loss: 267.3982 - x_output_mae: 12.9317 - y_output_loss: 215.4835 - y_output_mae: 11.6418 - val_loss: 464.6168 - val_x_output_loss: 251.1538 - val_x_output_mae: 12.6059 - val_y_output_loss: 228.4416 - val_y_output_mae: 11.5415
Epoch 3/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 200ms/step - loss: 484.6746 - x_output_loss: 272.7360 - x_output_mae: 13.0505 - y_output_loss: 211.9387 - y_output_mae: 11.6262 - val_loss: 464.5964 - val_x_output_loss: 250.9446 - val_x_output_mae: 12.6035 - val_y_output_loss: 228

<keras.src.callbacks.history.History at 0x7e2fb019d630>

In [43]:
X_train_speed_flattened = X_train_speed.reshape(4000, -1)


In [48]:
print(X_train_speed_flattened.shape)
print(y_train_speed.shape)


(4000, 245760)
(4000, 1)


In [49]:
from sklearn.linear_model import LinearRegression

model_continuous = LinearRegression()
model_continuous.fit(X_train_speed_flattened, y_train_speed)

# Now you can use the model for predictions, etc.


In [57]:
import os

test_folder = '/kaggle/input/beginners-hypothesis-25/BH25/Testing_Data'

test_features = []
video_ids = []

# The below function finds all the video ids in the test folder and sorts them, 
# and then stores their features in test_features

for video_id in sorted(
    [f for f in os.listdir(test_folder) if f.endswith(".mp4")], key=lambda x: int(x.split('.')[0])):
    video_path = os.path.join(test_folder, video_id)
    video_ids.append(video_id.split('.')[0]) 
    test_features.append(process_video(video_path))

test_features = np.array(test_features)  # Shape: (num_test_samples, 20*64*64*3)

In [78]:
test_features_single_frame=test_features[:, 0, :, :, :]

In [88]:
test_features.shape

(3000, 20, 64, 64, 3)

In [89]:

test_features_flatten = test_features.reshape(3000, -1)
test_features_flatten.shape

(3000, 245760)

In [90]:
y_test_prediction=model_continuous.predict(test_features_flatten)


In [80]:
y_test_summary_x_y = model_for_x_and_y_3d.predict(test_features)
y_test_power=color_model.predict(test_features_single_frame)
y_test_element=model_element.predict(test_features_single_frame)
y_test_motion=model["motion"].predict(test_features)

[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 79ms/step
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 58ms/step


In [93]:
y_categorical = {}
['element', 'motion', 'power']
y_categorical['element']=y_test_element
y_categorical['motion']=y_test_motion
y_categorical['power']=y_test_power

for col, encoder in label_encoders.items():
    print(f"Classes for column {col}: {encoder.classes_}")
print("keys in y_categorical",y_categorical.keys())

Classes for column element: ['Boden' 'Erde' 'Feuer' 'Luft' 'Wasser']
Classes for column motion: ['circular' 'linear' 'random' 'shm' 'zigzag']
Classes for column power: ['baau' 'blau' 'blru' 'brun' 'btau' 'gaun' 'gela' 'gelb' 'geld' 'gele'
 'gelt' 'got' 'grin' 'grun' 'grus' 'helb' 'lela' 'liga' 'lila' 'lili'
 'lisa' 'lula' 'not' 'plau' 'rat' 'rod' 'rot' 'rut' 'slau' 'trun']
keys in y_categorical dict_keys(['element', 'motion', 'power'])


In [94]:
decoded_categorical = {}

# Iterate over each column in y_categorical (representing each class)
for col_name, predictions in y_categorical.items():
    # Get the predicted class (argmax gives the index of the max value in softmax output)
    predicted_classes = np.argmax(predictions, axis=1)
    
    # Convert the predicted class index to actual class label using the LabelEncoder
    decoded_categorical[col_name] = label_encoders[col_name].inverse_transform(predicted_classes)

# Now decoded_categorical will contain the class labels for each column


In [95]:
speed_predictions = y_test_prediction.flatten()  # Ensure the shape is (num_samples,)
summary_x_predictions = list(y_test_summary_x_y[0])
summary_y_predictions = list(y_test_summary_x_y[1])

In [96]:
x_prediction=[]
y_prediction=[]
for i in range(len(summary_x_predictions)):
    x_prediction.append(summary_x_predictions[i].item())
    y_prediction.append(summary_y_predictions[i].item())

In [97]:
print(len(video_ids))
print(len(decoded_categorical['element']))
print(len(decoded_categorical['motion']))
print(len(decoded_categorical['power']))
print(len(speed_predictions))
print(len( list(zip(x_prediction, y_prediction)) ))


3000
3000
3000
3000
3000
3000


In [98]:
output_df = pd.DataFrame({
    'video_id': video_ids,                      
    'element': decoded_categorical['element'],  
    'motion': decoded_categorical['motion'],
    'power': decoded_categorical['power'],
    'speed': speed_predictions,               
    'video_summary': list(zip(x_prediction, y_prediction)) 
})

In [99]:
output_df.to_csv("submission.csv", index=False)

# This is the final submission file to be submitted