In [1]:
!pip install pycuda

Collecting pycuda
  Downloading pycuda-2024.1.tar.gz (1.7 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.7 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.1/1.7 MB[0m [31m3.7 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━[0m [32m1.0/1.7 MB[0m [31m15.2 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m18.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pytools>=2011.2 (from pycuda)
  Downloading pytools-2024.1.3-py2.py3-none-any.whl (87 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.4/87.4 kB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting appdirs>=1.4.0 (f

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
cuda_code = """
__global__ void normalize_image(float *img, int width, int height, float min_val, float max_val) {
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;

    if (x < width && y < height) {
        int idx = y * width + x;
        img[idx] = (img[idx] - min_val) / (max_val - min_val);
    }
}
"""

In [4]:
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import numpy as np

# Compile the CUDA code
mod = SourceModule(cuda_code)

# Get the kernel function
normalize_image = mod.get_function("normalize_image")
def normalize_image_gpu(images, min_val, max_val, batch_size=2, max_threads_per_block=512):
    num_images, height, width, channels = images.shape
    normalized_images = np.empty_like(images, dtype=np.float32)

    for i in range(0, num_images, batch_size):
        batch_images = images[i:i + batch_size].astype(np.float32).reshape(-1)
        # Allocate pinned memory
        batch_images_pinned = cuda.pagelocked_empty_like(batch_images)
        np.copyto(batch_images_pinned, batch_images)

        d_img = cuda.mem_alloc(batch_images_pinned.nbytes)
        cuda.memcpy_htod(d_img, batch_images_pinned)

        # Set hard bounds on the number of threads per block
        block_x = min(max_threads_per_block, width)
        block_y = min(max_threads_per_block // block_x, height)
        block_size = (block_x, block_y, 1)

        grid_x = (width + block_size[0] - 1) // block_size[0]
        grid_y = (height + block_size[1] - 1) // block_size[1]
        grid_size = (grid_x, grid_y, 1)

        normalize_image(d_img, np.int32(width), np.int32(height), np.float32(min_val), np.float32(max_val), block=block_size, grid=grid_size)

        cuda.memcpy_dtoh(batch_images_pinned, d_img)
        d_img.free()

        normalized_images[i:i + batch_size] = batch_images_pinned.reshape(-1, height, width, channels)

    return normalized_images



# # Example usage
# images = np.random.rand(10, 128, 128, 3).astype(np.float32)
# min_val, max_val = 0.0, 1.0
# normalized_images = normalize_image_gpu(images, min_val, max_val)


In [5]:
# Write the CUDA code to a file (this one is for computing)
cuda_code = """
__global__ void relu_forward(float* input, float* output, int size) {
    int idx = blockIdx.x * blockDim.x + threadIdx.x;

    if (idx < size) {
        output[idx] = max(0.0f, input[idx]);
    }
}
"""

In [6]:
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import numpy as np

# Compile the CUDA code
mod = SourceModule(cuda_code)

# Get the kernel function
relu_layer = mod.get_function("relu_forward")

def custom_relu(input_data):
    size = input_data.size
    output_data = np.empty_like(input_data, dtype=np.float32)

    d_input = cuda.mem_alloc(input_data.nbytes)
    d_output = cuda.mem_alloc(output_data.nbytes)

    cuda.memcpy_htod(d_input, input_data)

    block_size = 256
    num_blocks = (size + block_size - 1) // block_size

    relu_layer(d_input, d_output, np.int32(size), block=(block_size, 1, 1), grid=(num_blocks, 1))

    cuda.memcpy_dtoh(output_data, d_output)
    d_input.free()
    d_output.free()

    return output_data

# Example usage
# input_data = np.random.rand(1000).astype(np.float32)
# output_data = custom_relu(input_data)
# print(output_data)


In [17]:
import tensorflow as tf
import pandas as pd
import os
import numpy as np
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, Flatten, Input
from tensorflow.keras.optimizers import Adam

# Load the driver image list
df = pd.read_csv('/content/drive/My Drive/Acads/MEngAI/CS 239/Datasets/Distracted Driving/driver_imgs_list.csv')
df['img'] = df.apply(lambda row: os.path.join('/content/drive/My Drive/Acads/MEngAI/CS 239/Datasets/Distracted Driving/imgs/train', row['classname'], row['img']), axis=1)

# Load and preprocess images
file_paths = df['img'].values
labels = pd.get_dummies(df['classname']).values

# Convert file paths and labels to a TensorFlow dataset
dataset = tf.data.Dataset.from_tensor_slices((file_paths, labels))

def load_and_preprocess_image_with_label(path, label):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [224, 224])
    image = image / 255.0
    return image, label

# Map the function to the dataset
dataset = dataset.map(load_and_preprocess_image_with_label, num_parallel_calls=tf.data.experimental.AUTOTUNE)

# Determine the number of samples in the dataset
dataset_size = len(file_paths)
train_size = int(0.8 * dataset_size)
val_size = dataset_size - train_size

# Split the dataset
train_dataset = dataset.take(train_size)
val_dataset = dataset.skip(train_size)

# Verify the sizes of the datasets
train_count = sum(1 for _ in train_dataset)
val_count = sum(1 for _ in val_dataset)

print(f"Train dataset size: {train_count}")
print(f"Validation dataset size: {val_count}")

# Batch and prefetch the datasets
batch_size = 32

train_dataset = train_dataset.batch(batch_size)
val_dataset = val_dataset.batch(batch_size)

train_dataset = train_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
val_dataset = val_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

# Load VGG16 model without top layers
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the base model layers
base_model.trainable = False

# Add custom layers
x = base_model.output
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(len(np.unique(df['classname'])), activation='softmax')(x)

# Create the model
model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# Train the model
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=10
)


Train dataset size: 17939
Validation dataset size: 4485
Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 bl