In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import json
import numpy as np
import pandas as pd
import cv2
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split

# Define preprocessing function
def preprocess(df):
    drop_list = ["ados_preTest_communication", "ados_preTest_interaction", "ados_preTest_module", "ados_preTest_play",
                 "ados_preTest_protocol", "ados_preTest_socialCommunicationQuestionnaire", "ados_preTest_stereotype",
                 "condition", "frame_rate", "participant_gender", "participant_id", "skeleton_elbow_left_confidence",
                 "skeleton_elbow_right_confidence", "skeleton_hand_left_confidence", "skeleton_hand_right_confidence",
                 "skeleton_head_confidence", "skeleton_sholder_center_confidence", "skeleton_sholder_left_confidence",
                 "skeleton_sholder_right_confidence", "skeleton_wrist_left_confidence", "skeleton_wrist_right_confidence",
                 "task_ability", "task_difficultyLevel", "task_end", "task_index", "task_start", "time"]
    print(df.columns)
    df.drop(drop_list, axis=1, inplace=True)
    df.dropna(axis=0, inplace=True)
    df = df[::5].reset_index(drop=True)
    return df

# Define function to generate image from DataFrame
def getImage(df, image_size=30):
    ans = []
    for _, d in df.iterrows():
        image = np.zeros((image_size, image_size, 1), dtype=np.uint8)
        # Draw circles and lines for skeleton joints
        # Your code for drawing circles and lines goes here
        ans.append(image)
    return ans

# Define function to open JSON files
def open_json(path):
    with open(path) as f:
        return json.load(f)

# Define function to load data
def load_data(directory):
    videos = []
    y = []
    per_user = 10
    for user_dir in os.listdir(directory):
        user_count = 0
        for _, _, filenames in os.walk(os.path.join(directory, user_dir)):
            for filename in filenames:
                if user_count == per_user:
                    break
                data = open_json(os.path.join(directory, user_dir, filename))
                df = pd.DataFrame(data)
                df = preprocess(df)
                if df.shape[0] == 0:
                    continue
                y.append(df["ados_preTest_total"].iloc[0])
                videos.append(df.drop(["ados_preTest_total"], axis=1))
                user_count += 1
    return videos, y

# Load data
dataset_directory = "/kaggle/input/dream-dataset-part1/Part 1 Users"
videos, y = load_data(dataset_directory)

# Process video data
image_size = 30
X = np.array([getImage(video, image_size) for video in videos])

# Split data into train and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the neural network model
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        # Convolutional block
        self.conv2Dblock = nn.Sequential(
            # 1st convolutional block
            nn.Conv2d(in_channels=1,
                      out_channels=16,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(p=0.2),
            # 2nd convolutional block
            nn.Conv2d(in_channels=16,
                      out_channels=32,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=4, stride=4),
            nn.Dropout(p=0.2),
            # 3rd convolutional block
            nn.Conv2d(in_channels=32,
                      out_channels=64,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=4, stride=4),
            nn.Dropout(p=0.2)
        )
        # LSTM block
        hidden_size = 128
        self.lstm = nn.LSTM(input_size=192, hidden_size=hidden_size, bidirectional=True, batch_first=True)
        self.dropout_lstm = nn.Dropout(p=0.3)
        self.attention_linear = nn.Linear(2 * hidden_size, 1)  # 2 * hidden_size for the 2 outputs of bidirectional LSTM
        # Linear softmax layer
        self.out_linear = nn.Linear(2 * hidden_size, 1)

    def forward(self, x):
        batch_size, time_steps, height, width = x.size()
        x = x.view(batch_size * time_steps, 1, height, width)
        conv_embedding = self.conv2Dblock(x)
        conv_embedding = conv_embedding.view(batch_size, time_steps, -1, conv_embedding.size(2), conv_embedding.size(3))
        conv_embedding = torch.flatten(conv_embedding, start_dim=2)  # Do not flatten batch dimension and time
        lstm_embedding, (h, c) = self.lstm(conv_embedding)
        lstm_embedding = self.dropout_lstm(lstm_embedding)
        # LSTM embedding (batch, time, hidden_size*2)
        batch_size, T, _ = lstm_embedding.shape
        attention_weights = [None] * T
        for t in range(T):
            embedding = lstm_embedding[:, t, :]
            attention_weights[t] = self.attention_linear(embedding)
        attention_weights_norm = nn.functional.softmax(torch.stack(attention_weights, -1), dim=-1)
        attention = torch.bmm(attention_weights_norm, lstm_embedding)  # (Bx1xT)*(B,T,hidden_size*2)=(B,1,2*hidden_size)
        attention = torch.squeeze(attention, 1)
        prediction = self.out_linear(attention)

        return prediction

# Initialize the model, optimizer, and loss function
model = MyModel()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = nn.MSELoss()

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
Y_train_tensor = torch.tensor(Y_train, dtype=torch.float32).to(device)

# Train the model
epochs = 1000
model.train()
for epoch in range(epochs):
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = loss_fn(outputs, Y_train_tensor)
    loss.backward()
    optimizer.step()
    if epoch % 100 == 0:
        print(f"Epoch {epoch}: Loss {loss.item()}")

# Evaluate the model on test data
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
Y_test_tensor = torch.tensor(Y_test, dtype=torch.float32).to(device)
model.eval()
with torch.no_grad():
    test_outputs = model(X_test_tensor)
    test_loss = loss_fn(test_outputs, Y_test_tensor)
    print(f"Test Loss: {test_loss.item()}")


In [None]:
df.columns