In [None]:
!pip install -q ultralytics
!pip install -q deep_sort_realtime
import cv2
import torch
import time
import numpy as np
from deep_sort_realtime.deepsort_tracker import DeepSort
from ultralytics import YOLO
from collections import defaultdict

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Dummy dataset (replace with real movement trajectories and labels)
X = torch.randn(100, 20, 2)  # 100 sequences, 20 points, (x,y)
y = torch.randint(0, 3, (100,))  # 3 classes: 0=Walking, 1=Standing, 2=Loitering

train_dataset = TensorDataset(X, y)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

# Define model
class BehaviorClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=2, hidden_size=64, batch_first=True)
        self.fc = nn.Linear(64, 3)

    def forward(self, x):
        _, (h, _) = self.lstm(x)
        return self.fc(h[-1])

model = BehaviorClassifier()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train loop
for epoch in range(20):
    for batch_x, batch_y in train_loader:
        optimizer.zero_grad()
        output = model(batch_x)
        loss = criterion(output, batch_y)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

# Save the model
torch.save(model.state_dict(), "lstm_model.pth")


Epoch 1, Loss: 1.0796
Epoch 2, Loss: 1.1081
Epoch 3, Loss: 1.0891
Epoch 4, Loss: 1.1829
Epoch 5, Loss: 1.0553
Epoch 6, Loss: 0.9994
Epoch 7, Loss: 0.8939
Epoch 8, Loss: 0.9265
Epoch 9, Loss: 1.1100
Epoch 10, Loss: 0.9026
Epoch 11, Loss: 1.0359
Epoch 12, Loss: 1.0942
Epoch 13, Loss: 1.2864
Epoch 14, Loss: 1.2961
Epoch 15, Loss: 0.9847
Epoch 16, Loss: 1.2295
Epoch 17, Loss: 0.9949
Epoch 18, Loss: 0.9752
Epoch 19, Loss: 1.0454
Epoch 20, Loss: 1.0828


In [None]:
# YOLOv8 model
yolo_model = YOLO("yolov8n.pt")

# LSTM behavior classification model
class BehaviorClassifier(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = torch.nn.LSTM(input_size=2, hidden_size=64, batch_first=True)
        self.fc = torch.nn.Linear(64, 3)

    def forward(self, x):
        _, (h, _) = self.lstm(x)
        return self.fc(h[-1])

behavior_model = BehaviorClassifier()
behavior_model.load_state_dict(torch.load("/kaggle/working/lstm_model.pth", map_location=torch.device('cpu')))
behavior_model.eval()

behavior_labels = ["Walking", "Standing", "Loitering"]
colors = [(0,255,0), (255,255,0), (0,0,255)]

In [None]:
deep_sort = DeepSort(max_age=30)

In [None]:
loiter_timer = {}  # track_id -> [start_time, is_triggered]
LOITER_THRESHOLD = 5  # seconds
track_history = defaultdict(list)  # For LSTM

# Resize parameters
resize_width, resize_height = 640, 360

In [None]:
def blend_frames(rgb_frame, thermal_frame, alpha=0.5):
    rgb_resized = cv2.resize(rgb_frame, (thermal_frame.shape[1], thermal_frame.shape[0]))
    return cv2.addWeighted(rgb_resized, alpha, thermal_frame, 1 - alpha, 0)

# ========================== Video Input ==============================
use_webcam = True
if use_webcam:
    cap = cv2.VideoCapture(0)  # Webcam
    thermal_cap = None  # Assume thermal cam not connected
else:
    cap = cv2.VideoCapture("/kaggle/input/input-video/rgb.mp4")
    thermal_cap = cv2.VideoCapture("/kaggle/input/thermal-video/thermal.mp4")

frame_count = 0
while True:
    ret_rgb, rgb_frame = cap.read()
    if thermal_cap:
        ret_thermal, thermal_frame = thermal_cap.read()
        if not ret_rgb or not ret_thermal:
            break
        frame = blend_frames(rgb_frame, thermal_frame)
    else:
        if not ret_rgb:
            break
        frame = rgb_frame

    # Resize
    frame = cv2.resize(frame, (resize_width, resize_height))

    # ==================== YOLO Detection ==========================
    results = yolo_model(frame)[0]
    detections = []
    for box in results.boxes.data.tolist():
        x1, y1, x2, y2, score, cls = box
        if int(cls) == 0 and score > 0.5:  # Person class
            detections.append(([x1, y1, x2 - x1, y2 - y1], score, "person"))

    # ==================== DeepSORT Tracking ==========================
    tracks = deep_sort.update_tracks(detections, frame=frame)

    for track in tracks:
        if not track.is_confirmed():
            continue

        track_id = track.track_id
        l, t, w, h = track.to_ltrb()
        r, b = l + w, t + h
        center = [int((l + r) / 2), int((t + b) / 2)]

        # Update movement history
        track_history[track_id].append(center)
        if len(track_history[track_id]) > 20:
            track_history[track_id] = track_history[track_id][-20:]

        label = "Unknown"
        color = (200, 200, 200)

        if track_id not in loiter_timer:
            loiter_timer[track_id] = [None, False]

        # ================== LSTM Behavior Classification ==================
        if len(track_history[track_id]) >= 10:
            traj = torch.tensor(track_history[track_id], dtype=torch.float32).unsqueeze(0)
            pred = behavior_model(traj)
            pred_idx = pred.argmax(dim=1).item()
            label = behavior_labels[pred_idx]
            color = colors[pred_idx]

            # Loitering Alert
            if label == "Loitering":
                if loiter_timer[track_id][0] is None:
                    loiter_timer[track_id][0] = time.time()
                else:
                    elapsed = time.time() - loiter_timer[track_id][0]
                    if elapsed > LOITER_THRESHOLD and not loiter_timer[track_id][1]:
                        print(f"\u2757 ALERT: Track {track_id} LOITERING > {int(elapsed)}s")
                        loiter_timer[track_id][1] = True
                        cv2.putText(frame, "\u2757 ALERT", (int(l), int(b) + 20),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 3)
            else:
                loiter_timer[track_id] = [None, False]

        # ================== Draw Boxes and Labels ==========================
        cv2.rectangle(frame, (int(l), int(t)), (int(r), int(b)), color, 2)
        cv2.putText(frame, f"{label} ID:{track_id}", (int(l), int(t) - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    frame_count += 1
    if frame_count % 10 == 0:
        print(f"Processed frame {frame_count}")

    cv2.imshow("Surveillance", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

In [None]:
cap.release()
if thermal_cap:
    thermal_cap.release()
cv2.destroyAllWindows()