**LSTM Implementation**

**Prepare Dataset**

In [1]:
import cv2
import torch
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
import torch.utils.data as data_utils
import torch.nn as nn
from torchvision import models
import os
import system

ModuleNotFoundError: No module named 'cv2'

In [2]:
folder_path = "C:/Users/juhye/Desktop/Current Files/플젝/SlurryViscNet/data_repository/segmented_video"
video_files = [f for f in os.listdir(folder_path) if f.endswith('.mp4')]

print(len(video_files))

video_paths = [os.path.join(folder_path, f) for f in video_files]

2


In [None]:
input_video_path = "C:/Users/juhye/Desktop/Current Files/플젝/SlurryViscNet/data_repository/segmented_video/output_flag_input2.mp4"

videos = []

for path in video_paths:
    cap = cv2.VideoCapture(input_video_path)
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    frames = []
    i = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = frame / 255.0
        frames.append(frame)
    cap.release()
    video_np = np.array(frames, dtype=np.float32)  # Shape: [T, H, W, C]

    videos.append(video_np) 

video_datas_np = np.array(videos, dtype=np.float32) # Shape: [N, T, H, W, C]

video_tensor = torch.tensor(video_np).permute(0, 1, 4, 2, 3) # Shape: [N, T, C, H, W]
labels_tensor = torch.rand(video_tensor.shape[0], video_tensor.shape[1])

print("Video tensor shape:", video_tensor.shape)

In [18]:
train_dataset = TensorDataset(video_tensor, labels_tensor)
train_loader = DataLoader(dataset = train_dataset, batch_size= 4, shuffle= True)

**Resnet-LSTM Architecture**

In [19]:

class ViscosityEstimator(nn.Module):
    def __init__(self, cnn_model='resnet18', lstm_hidden_size=128, lstm_layers=2):
        super(ViscosityEstimator, self).__init__()
        
        # Pre-trained CNN
        self.cnn = getattr(models, cnn_model)(pretrained=True)
        self.cnn = nn.Sequential(*list(self.cnn.children())[:-1])  # 1st component is for classification > remove
        self.cnn_out_features = 512
        
        # LSTM
        self.lstm = nn.LSTM(input_size=self.cnn_out_features, 
                            hidden_size=lstm_hidden_size, 
                            num_layers=lstm_layers, 
                            batch_first=True)
        
        # Fully connected
        self.fc = nn.Linear(lstm_hidden_size, 1)
    
    def forward(self, x):
        """ x: (batch_size, sequence_length, channels, height, width)"""
        batch_size, seq_len, channels, height, width = x.size()

        cnn_features = []

        for t in range(seq_len):
            frame = x[:, t, :, :, :] 
            frame_features = self.cnn(frame)
            frame_features = frame_features.view(batch_size, -1) 
            cnn_features.append(frame_features)
        
        cnn_features = torch.stack(cnn_features, dim=1)
        lstm_out, _ = self.lstm(cnn_features)
        lstm_last_out = lstm_out[:, -1, :]
        
        viscosity = self.fc(lstm_last_out)  # (batch_size, 1)
        
        return viscosity


**Training Code**

In [None]:
import torch.optim as optim

model = ViscosityEstimator(cnn_model='resnet18', lstm_hidden_size=128, lstm_layers=2)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

In [None]:
num_epochs = 1
device = 'cuda' if torch.cuda.is_available() else 'cpu'

for epoch in range(num_epochs):
    model.train()
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
    
    print(f"Epoch {epoch + 1}, Loss: {loss.item()}")