**LSTM Implementation**

**Prepare Dataset**

In [2]:
import cv2
import torch
import numpy as np

input_video_path = "./data_repository/segmented_video"

cap = cv2.VideoCapture(input_video_path)
fps = int(cap.get(cv2.CAP_PROP_FPS))
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

frames = []
labels = []

while True:
    ret, frame = cap.read()
    if not ret:
        break
 
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame = frame / 255.0
    frames.append(frame)

    # labels = 0
    # should update the labes code according to GT data format
    ##

cap.release()

video_np = np.array(frames, dtype=np.float32)  # Shape: [T, H, W, C]
video_tensor = torch.from_numpy(video_np).permute(3, 0, 1, 2)  # Shape: [C, T, H, W]

print("Video tensor shape:", video_tensor.shape)

**Resnet-LSTM Architecture**

In [None]:
import torch
import torch.nn as nn
from torchvision import models

class ViscosityEstimator(nn.Module):
    def __init__(self, cnn_model='resnet18', lstm_hidden_size=128, lstm_layers=2):
        super(ViscosityEstimator, self).__init__()
        
        # Pre-trained CNN
        self.cnn = getattr(models, cnn_model)(pretrained=True)
        self.cnn = nn.Sequential(*list(self.cnn.children())[:-1])  # 1st component is for classification > remove
        self.cnn_out_features = 512
        
        # LSTM
        self.lstm = nn.LSTM(input_size=self.cnn_out_features, 
                            hidden_size=lstm_hidden_size, 
                            num_layers=lstm_layers, 
                            batch_first=True)
        
        # Fully connected
        self.fc = nn.Linear(lstm_hidden_size, 1)
    
    def forward(self, x):
        """ x: (batch_size, sequence_length, channels, height, width)"""
        batch_size, seq_len, channels, height, width = x.size()

        cnn_features = []

        for t in range(seq_len):
            frame = x[:, t, :, :, :] 
            frame_features = self.cnn(frame)
            frame_features = frame_features.view(batch_size, -1) 
            cnn_features.append(frame_features)
        
        cnn_features = torch.stack(cnn_features, dim=1)
        lstm_out, _ = self.lstm(cnn_features)
        lstm_last_out = lstm_out[:, -1, :]
        
        viscosity = self.fc(lstm_last_out)  # (batch_size, 1)
        
        return viscosity


**Training Code**

In [None]:
import torch.optim as optim

model = ViscosityEstimator(cnn_model='resnet18', lstm_hidden_size=128, lstm_layers=2)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

In [None]:
num_epochs = 1
device = 'cuda' if torch.cuda.is_available() else 'cpu'

for epoch in range(num_epochs):
    model.train()
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
    
    print(f"Epoch {epoch + 1}, Loss: {loss.item()}")