<a href="https://colab.research.google.com/github/asserhan/Human_Recognition/blob/main/Copie_de_TensorFlow_with_GPU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Tensorflow with GPU

This notebook provides an introduction to computing on a [GPU](https://cloud.google.com/gpu) in Colab. In this notebook you will connect to a GPU, and then run some basic TensorFlow operations on both the CPU and a GPU, observing the speedup provided by using the GPU.


## Enabling and testing the GPU

First, you'll need to enable GPUs for the notebook:

- Navigate to Edit→Notebook Settings
- select GPU from the Hardware Accelerator drop-down

Next, we'll confirm that we can connect to the GPU with tensorflow:

In [None]:
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
    print('Found GPU at: {}'.format(device_name))

## Observe TensorFlow speedup on GPU relative to CPU

This example constructs a typical convolutional neural network layer over a
random image and manually places the resulting ops on either the CPU or the GPU
to compare execution speed.

In [None]:
import tensorflow as tf
import timeit

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  print(
      '\n\nThis error most likely means that this notebook is not '
      'configured to use a GPU.  Change this in Notebook Settings via the '
      'command palette (cmd/ctrl-shift-P) or the Edit menu.\n\n')
  raise SystemError('GPU device not found')

def cpu():
  with tf.device('/cpu:0'):
    random_image_cpu = tf.random.normal((100, 100, 100, 3))
    net_cpu = tf.keras.layers.Conv2D(32, 7)(random_image_cpu)
    return tf.math.reduce_sum(net_cpu)

def gpu():
  with tf.device('/device:GPU:0'):
    random_image_gpu = tf.random.normal((100, 100, 100, 3))
    net_gpu = tf.keras.layers.Conv2D(32, 7)(random_image_gpu)
    return tf.math.reduce_sum(net_gpu)

# We run each op once to warm up; see: https://stackoverflow.com/a/45067900
cpu()
gpu()

# Run the op several times.
print('Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images '
      '(batch x height x width x channel). Sum of ten runs.')
print('CPU (s):')
cpu_time = timeit.timeit('cpu()', number=10, setup="from __main__ import cpu")
print(cpu_time)
print('GPU (s):')
gpu_time = timeit.timeit('gpu()', number=10, setup="from __main__ import gpu")
print(gpu_time)
print('GPU speedup over CPU: {}x'.format(int(cpu_time/gpu_time)))

Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images (batch x height x width x channel). Sum of ten runs.
CPU (s):
4.834257801000149
GPU (s):
0.2202789739999389
GPU speedup over CPU: 21x


# Nouvelle section


In [None]:
import tensorflow as tf
print(tf.config.list_physical_devices('GPU'))


[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


# Data Collection

In [None]:
!pip install mediapipe opencv-python




In [None]:
!pip install -q roboflow

from roboflow import Roboflow

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/86.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.9/86.9 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.8/66.8 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.9/49.9 MB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.2/4.2 MB[0m [31m87.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.9/4.9 MB[0m [31m89.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
# Use existing datasets - no need to collect your own
datasets = [
    "Human Action Recognition Dataset",  # Already contains running
    "Roboflow Fall Detection Dataset",   # Contains fall examples
    "UCF-Crime Dataset",                # Contains suspicious activities
    "Video Surveillance Dataset"        # Contains normal activities
]

# Download datasets
!pip install datasets --upgrade

from datasets import load_dataset

# Get human action recognition data
# Load from Hugging Face Hub directly
har_dataset = load_dataset("Bingsu/Human_Action_Recognition")
print(har_dataset)


# Get fall detection data from Roboflow
ROBOFLOW_API_KEY = "mQMjkZ68pG6tCNHpWPZ1"
rf = Roboflow(api_key=ROBOFLOW_API_KEY)
fall_project = rf.workspace("roboflow-universe-projects").project("fall-detection-ca3o8")
fall_dataset = fall_project.version(1).download("yolov8")

DatasetDict({
    train: Dataset({
        features: ['image', 'labels'],
        num_rows: 12600
    })
    test: Dataset({
        features: ['image', 'labels'],
        num_rows: 5400
    })
})
loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in Fall-Detection-1 to yolov8:: 100%|██████████| 142831/142831 [00:02<00:00, 67146.19it/s]





Extracting Dataset Version Zip to Fall-Detection-1 in yolov8:: 100%|██████████| 9006/9006 [00:01<00:00, 8408.97it/s]


In [None]:
print(har_dataset['train'].features)

{'image': Image(mode=None, decode=True), 'labels': ClassLabel(names=['calling', 'clapping', 'cycling', 'dancing', 'drinking', 'eating', 'fighting', 'hugging', 'laughing', 'listening_to_music', 'running', 'sitting', 'sleeping', 'texting', 'using_laptop'])}


# Data Preprocessing

In [None]:
label_names = har_dataset['train'].features['labels'].names

har_image_paths = []
har_labels = []

NORMAL_KEEP = {'sitting', 'using_laptop', 'texting', 'listening_to_music', 'hugging', 'laughing'}

for ex in har_dataset['train']:
    label_idx = ex['labels']                # integer index
    label_str = label_names[label_idx]      # map to string

    image = ex['image']                      # PIL Image

    if label_str == 'running':
        har_image_paths.append(image)
        har_labels.append(1)                 # running = 1
    elif label_str in NORMAL_KEEP:
        har_image_paths.append(image)
        har_labels.append(0)                 # normal = 0


In [None]:
print("Number of images:", len(har_image_paths))
print("Labels count:", {0: har_labels.count(0), 1: har_labels.count(1)})


Number of images: 5880
Labels count: {0: 5040, 1: 840}


In [None]:
from torchvision import transforms

class HARImageDataset(Dataset):
    def __init__(self, images, labels, transform=None, frames=16):
        self.images = images        # list of PIL Images
        self.labels = labels        # list of ints (0 or 1)
        self.transform = transform
        self.frames = frames        # number of frames to simulate video

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = self.images[idx]
        if self.transform:
            img = self.transform(img)  # (C,H,W), tensor

        # Repeat img along the time dimension to simulate video clip
        img = img.unsqueeze(1).repeat(1, self.frames, 1, 1)  # (C, T, H, W)

        label = self.labels[idx]
        return img, label

# Data transforms for PIL images
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])


In [None]:
dataset = HARImageDataset(har_image_paths, har_labels, transform=transform)

# Test sample
x, y = dataset[0]
print(x.shape)  # should print: torch.Size([3, 16, 224, 224])
print(y)        # label: 0 or 1


torch.Size([3, 16, 224, 224])
0


# Model Architecture

In [None]:
import torch.nn as nn
from torchvision.models.video import r3d_18

class BankSecurityModel(nn.Module):
    def __init__(self, num_classes=3):
        super(BankSecurityModel, self).__init__()

        # 3D ResNet backbone
        self.backbone = r3d_18(pretrained=True)
        self.backbone.fc = nn.Linear(self.backbone.fc.in_features, num_classes)

    def forward(self, x):
        # x shape: (batch_size, channels, frames, height, width)
        return self.backbone(x)

# Initialize model
model = BankSecurityModel(num_classes=3)


Downloading: "https://download.pytorch.org/models/r3d_18-b3b3357e.pth" to /root/.cache/torch/hub/checkpoints/r3d_18-b3b3357e.pth
100%|██████████| 127M/127M [00:00<00:00, 187MB/s]


In [None]:
# Example input: batch of 2 videos
dummy_input = torch.randn(2, 3, 16, 224, 224)  # (batch_size, channels, frames, height, width)
output = model(dummy_input)
print(output.shape)  # should be (2, 3) for 3 classes


torch.Size([2, 3])


# Training setup

In [None]:
# 1. Define model
model = BankSecurityModel(num_classes=3)

# 2. Send to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# 3. Define loss, optimizer, scheduler
criterion = CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)


In [None]:
import torch.optim as optim
from torch.nn import CrossEntropyLoss

criterion = CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

def train_model(model, dataloader, device, epochs=20, save_path="trained_model.pth"):
    model.to(device)

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for batch_idx, (data, target) in enumerate(dataloader):
            data, target = data.to(device), target.to(device)

            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(output, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

            if batch_idx % 10 == 0:
                print(f'Epoch [{epoch+1}/{epochs}], Batch [{batch_idx}/{len(dataloader)}], Loss: {loss.item():.4f}')

        epoch_loss = running_loss / len(dataloader)
        accuracy = 100 * correct / total
        print(f'✅ Epoch [{epoch+1}/{epochs}] -> Loss: {epoch_loss:.4f}, Accuracy: {accuracy:.2f}%')

        scheduler.step()

    # ✅ Save the trained model
    torch.save(model.state_dict(), save_path)
    print(f"✅ Model saved to {save_path}")

    return model


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train model
trained_model = train_model(model, train_loader, device, epochs=10, save_path="trained_model.pth")


NameError: name 'train_loader' is not defined

In [None]:
import cv2
import numpy as np
import torch
from collections import deque
from torchvision import transforms

class RealTimeDetector:
    def __init__(self, model_path, device):
        self.device = device
        self.model = torch.load(model_path, map_location=self.device)
        self.model.eval()
        self.frame_buffer = deque(maxlen=16)
        self.classes = ['Normal', 'Running', 'Fall']

        # Normalization transform (same as training)
        self.transform = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                              std=[0.229, 0.224, 0.225])

    def preprocess_frames(self, frames):
        # frames: numpy array of shape (16, 224, 224, 3), uint8

        # Convert to float tensor and permute to (C, T, H, W)
        frames = torch.tensor(frames, dtype=torch.float32).permute(3, 0, 1, 2) / 255.0

        # Normalize each channel
        for c in range(3):
            frames[c] = self.transform(frames[c])

        # Add batch dimension
        frames = frames.unsqueeze(0).to(self.device)  # (1, 3, 16, 224, 224)
        return frames

    def detect_from_camera(self, camera_index=0):
        cap = cv2.VideoCapture(camera_index)

        while True:
            ret, frame = cap.read()
            if not ret:
                break

            frame_resized = cv2.resize(frame, (224, 224))
            self.frame_buffer.append(frame_resized)

            if len(self.frame_buffer) == 16:
                frames_tensor = self.preprocess_frames(np.array(self.frame_buffer))

                with torch.no_grad():
                    prediction = self.model(frames_tensor)
                    predicted_class = torch.argmax(prediction, dim=1).item()
                    confidence = torch.softmax(prediction, dim=1)[0][predicted_class].item()

                label = f"{self.classes[predicted_class]}: {confidence:.2f}"
                color = (0, 255, 0) if predicted_class == 0 else (0, 0, 255)

                cv2.putText(frame, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
                            1, color, 2)

                if predicted_class != 0 and confidence > 0.7:
                    cv2.putText(frame, "ALERT!", (10, 70), cv2.FONT_HERSHEY_SIMPLEX,
                                1, (0, 0, 255), 2)

            cv2.imshow('Bank Security Monitor', frame)

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        cap.release()
        cv2.destroyAllWindows()

# Usage
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
detector = RealTimeDetector('trained_model.pth', device)
detector.detect_from_camera()


FileNotFoundError: [Errno 2] No such file or directory: 'trained_model.pth'