<a href="https://colab.research.google.com/github/dyllanesl/AI-EDGE-Project/blob/main/ASL_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Restart Runtime
Quick way to restart the runtime

In [None]:
import os
os._exit(00)



#Install dependencies
Install dependices and necessary libaries

In [None]:
# Install essential libraries
!pip install torch==2.3.0 torchvision==0.18.1
!pip install diffusers transformers datasets accelerate
!pip install mediapipe opencv-python

# Download the Mediapipe hand landmarker model
!wget -q https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task


#Load Specific pre-trained diffusion model
From hugging Face Model Hub , this provides an easy way to load and interact with pre-trained diffusion models. Here in this cell specifically we load the pre-trained model: Cite:@InProceedings{Rombach_2022_CVPR,
    author    = {Rombach, Robin and Blattmann, Andreas and Lorenz, Dominik and Esser, Patrick and Ommer, Bj\"orn},
    title     = {High-Resolution Image Synthesis With Latent Diffusion Models},
    booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
    month     = {June},
    year      = {2022},
    pages     = {10684-10695}
}


In [None]:
from diffusers import DiffusionPipeline

# Load the pre-trained model
pipeline = DiffusionPipeline.from_pretrained("radames/stable-diffusion-v1-5-img2img")
pipeline.to("cuda")  # Use GPU if available


#Train Model Here
In this cell we upload the data set then unzip file, the load the dataset into a DataFrame to prepare to be trained

In [None]:
import zipfile
import pandas as pd
import os
from google.colab import files

# Upload the ZIP file
uploaded = files.upload()

# Define the path to the uploaded ZIP file
zip_path = '/content/AslDs.zip'

# Define the extraction path
extract_path = '/content/AslDs'

# Unzip the dataset
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    # Extract all the contents of the zip file into the specified directory
    zip_ref.extractall(extract_path)

# List the extracted files to verify
extracted_files = os.listdir(extract_path)
print("Extracted files:", extracted_files)

In [None]:
# Verify the extracted files
extracted_files = os.listdir(extract_path)
print("Extracted files:", extracted_files)


In [None]:
import zipfile
import pandas as pd
import os
import random
from google.colab import files

# Define paths to the dataset and CSV files
dataset_path = os.path.join(extract_path, 'dataset')
pre_csv_path = os.path.join(extract_path, 'pre_ASL_dataset.csv')
post_csv_path = os.path.join(extract_path, 'post_ASL_dataset.csv')

# List all images in the dataset directory
image_files = [f for f in os.listdir(dataset_path) if os.path.isfile(os.path.join(dataset_path, f))]

# Create a DataFrame with the list of images and required parameters
labels = [img.split('.')[0] for img in image_files]
prompts = [f"Generate an improved realistic american sign language hand gesture based on the letter {label}" for label in labels]

# Create a DataFrame with the list of images and required parameters
new_data = {
    'image_path': [os.path.join(dataset_path, img) for img in image_files],
    'label': labels,
    'prompt': prompts,
    'ddim_steps': 5,
    'plms': False,
    'scale': 7.5,
    'H': 512,
    'W': 512,
    'seed': [random.randint(0, 4294967295) for _ in range(len(image_files))]
}

new_df = pd.DataFrame(new_data)

# Function to append new data to existing CSV
def append_to_csv(csv_path, new_df):
    if os.path.exists(csv_path):
        # Load the existing CSV file
        df = pd.read_csv(csv_path)
        # Append new data to the DataFrame
        df = pd.concat([df, new_df], ignore_index=True)
    else:
        # If the file does not exist, create a new DataFrame
        df = new_df
    # Save the updated DataFrame back to the CSV file
    df.to_csv(csv_path, index=False)
    return df

# Append new data to pre_ASL_dataset.csv
pre_df = append_to_csv(pre_csv_path, new_df)
# # Append new data to post_ASL_dataset.csv
# post_df = append_to_csv(post_csv_path, new_df)

print("Updated pre_ASL_dataset.csv:")
print(pre_df.head())
# print("Updated post_ASL_dataset.csv:")
# print(post_df.head())

In [None]:
import os
from PIL import Image
from diffusers import DiffusionPipeline
import torch

# Supported image formats
supported_formats = (".png", ".jpg", ".jpeg", ".bmp", ".gif")

# Load the existing CSV data
csv_path = os.path.join(extract_path, 'pre_ASL_dataset.csv')
df = pd.read_csv(csv_path)

# Define the output directory for new images
output_dir = "/content/generated_images"
os.makedirs(output_dir, exist_ok=True)

# Load the existing CSV data
df = pd.read_csv(pre_csv_path)

# Create a list to store new data
new_data = {
    'image_path': [],
    'prompt': [],
    'ddim_steps': [],
    'plms': [],
    'scale': [],
    'H': [],
    'W': [],
    'seed': [],
    'label': []
}

# Initialize a counter for the new image filenames
counter = 1

# Iterate over the existing data to generate new images
for index, row in df.iterrows():
    image_name = os.path.basename(row['image_path'])  # Get the image name
    label = row['label']
    ddim_steps = row['ddim_steps']
    plms = row['plms']
    scale = row['scale']
    H = row['H']
    W = row['W']
    seed = row['seed']
    input_image_path = os.path.join(dataset_path, image_name)


    # Skip non-image files and directories
    if os.path.isdir(input_image_path) or not image_name.lower().endswith(supported_formats):
        continue

    try:

        input_image = Image.open(input_image_path).convert("RGB")  # Open and convert the image to RGB
        # Set the seed for reproducibility
        generator = torch.manual_seed(seed)
        # Create a more specific prompt based on the label
        prompt = f"Generate a realistic hand gesture representing the American sign language letter {label}, with the hand clearly showing the gesture in a neutral background"

        # Generate an output image using the model
        output = pipeline(prompt=prompt, image=input_image)  # Run the image through the pipeline with a prompt
        output_image = output.images[0]  # Get the generated image from the output

        # Save the output image
        output_image_name = f"{label}{counter}.png"
        output_image_path = os.path.join(output_dir, output_image_name)  # Define the path to save the output image
        output_image.save(output_image_path)  # Save the generated image

        # Add the new data to the list
        new_data['image_path'].append(output_image_path)
        new_data['prompt'].append(prompt)
        new_data['ddim_steps'].append(50)
        new_data['plms'].append(False)
        new_data['scale'].append(7.5)
        new_data['H'].append(512)
        new_data['W'].append(512)
        new_data['seed'].append(random.randint(0, 4294967295))  # Random seed for reproducibility
        new_data['label'].append(label)

        # Increment the counter
        counter += 1

    except Exception as e:
        print(f"Error processing {input_image_path}: {e}")  # Print any errors encountered during processing

# Create a new DataFrame with the generated data
new_df = pd.DataFrame(new_data)

# Call the function to append new data to the post_ASL_dataset.csv
post_df = append_to_csv(post_csv_path, new_df)

# Display the updated DataFrame to verify
print("Updated post_ASL_dataset.csv:")
print(post_df.head())


In [None]:
output_dir = "/content/generated_images"
generated_images = os.listdir(output_dir)
print("Generated Images:", generated_images)
import matplotlib.pyplot as plt

# Function to display an image
def display_image(image_path):
    img = Image.open(image_path)
    plt.imshow(img)
    plt.axis('off')  # Hide axes
    plt.show()

# Display a specific generated image
image_to_display = os.path.join(output_dir, generated_images[0])  # Change index to display a different image
display_image(image_to_display)


In [None]:
import matplotlib.pyplot as plt

# Display multiple images in a grid
def display_images(image_paths, cols=3, rows=3):
    fig, axes = plt.subplots(rows, cols, figsize=(15, 15))
    for ax, img_path in zip(axes.flatten(), image_paths):
        img = Image.open(img_path)
        ax.imshow(img)
        ax.axis('off')
    plt.tight_layout()
    plt.show()

# Get paths for a few generated images
image_paths = [os.path.join(output_dir, img) for img in generated_images[:9]]  # Adjust the number of images as needed
display_images(image_paths)

**Description:** Activates camera and captures images (frames) for the MediaPipe
to detect and visualize

**Goals:**
- Loop function to take a picture every 5 seconds until deactivate
  - This would be the constant changing of frames
-Implement the capturing of videos to detext ASL letters such as Z

**Current functionality:**
- Can manually take a screenshot of the image

In [None]:
#Activates camera, captures images
from IPython.display import display, Javascript
from google.colab.output import eval_js
from base64 import b64decode

#Function to take the actual photo from code snippet
def take_photo(filename='photo.jpg', quality=0.8):
  js = Javascript('''
    async function takePhoto(quality) {
      const div = document.createElement('div');
      const capture = document.createElement('button');
      capture.textContent = 'Capture';
      div.appendChild(capture);

      const video = document.createElement('video');
      video.style.display = 'block';
      const stream = await navigator.mediaDevices.getUserMedia({video: true});

      document.body.appendChild(div);
      div.appendChild(video);
      video.srcObject = stream;
      await video.play();

      // Resize the output to fit the video element.
      google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);

      // Wait for Capture to be clicked.
      await new Promise((resolve) => capture.onclick = resolve);

      const canvas = document.createElement('canvas');
      canvas.width = video.videoWidth;
      canvas.height = video.videoHeight;
      canvas.getContext('2d').drawImage(video, 0, 0);
      stream.getVideoTracks()[0].stop();
      div.remove();
      return canvas.toDataURL('image/jpeg', quality);
    }
    ''')
  display(js)
  data = eval_js('takePhoto({})'.format(quality))
  binary = b64decode(data.split(',')[1])
  with open(filename, 'wb') as f:
    f.write(binary)
  return filename

Description: Used to Visualize the "Hand Landmark Detection" Solution

In [None]:
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import numpy as np

MARGIN = 10 # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
FONT_COLOR = (88, 205, 54) # RGB formula for vibrant green
HANDEDNESS_TEXT_COLOR = (88, 205, 54) # Define HANDEDNESS_TEXT_COLOR


def draw_landmarks_on_image(rgb_image, detection_result):
  hand_landmarks_list = detection_result.hand_landmarks
  handedness_list = detection_result.handedness
  annotated_image = np.copy(rgb_image)


  # Loop through the detected hands to visualize.
  for idx in range(len(hand_landmarks_list)):
    hand_landmarks = hand_landmarks_list[idx]
    handedness = handedness_list[idx]

    # Draw the hand landmarks.
    hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    hand_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks
    ])
    solutions.drawing_utils.draw_landmarks(
      annotated_image,
      hand_landmarks_proto,
      solutions.hands.HAND_CONNECTIONS,
      solutions.drawing_styles.get_default_hand_landmarks_style(),
      solutions.drawing_styles.get_default_hand_connections_style())

    # Get the top left corner of the detected hand's bounding box.
    height, width, _ = annotated_image.shape
    x_coordinates = [landmark.x for landmark in hand_landmarks]
    y_coordinates = [landmark.y for landmark in hand_landmarks]
    text_x = int(min(x_coordinates) * width)
    text_y = int(min(y_coordinates) * height) - MARGIN

    # Draw handedness (left or right hand) on the image.
    cv2.putText(annotated_image, f"{handedness[0].category_name}",
                (text_x, text_y), cv2.FONT_HERSHEY_DUPLEX,
                FONT_SIZE, HANDEDNESS_TEXT_COLOR, FONT_THICKNESS, cv2.LINE_AA)

  return annotated_image

Description: Uploading / Collecting images to process

In [None]:
# #Import libraries
# import cv2 #OpenCV library that processes images and videos
# from google.colab.patches import cv2_imshow #allows cv2 to work in colab

# '''
# # Code for uploading an image manually
# from google.colab import files

# uploaded = files.upload()

# for filename in uploaded:
#   content = uploaded[filename]
#   with open(filename, 'wb') as f:
#     f.write(content)

# if len(uploaded.keys()):
#   IMAGE_FILE = next(iter(uploaded))
#   print('Uploaded file:', IMAGE_FILE)
# '''

# # img = cv2.imread(IMAGE_FILE)
# # cv2_imshow(img)


In [None]:
#Import libraries
import cv2 #OpenCV library that processes images and videos
from google.colab.patches import cv2_imshow #allows cv2 to work in colab
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
# import google.colab as files

def main():

  # STEP 1: Take the photo and save it
  filename = take_photo()
  print('Saved to {}'.format(filename))

  '''
  Uncomment this code to upload an image from computer and uncomment the
  files library from google.colab and change
  '''
  # uploaded = files.upload()

  # for imageName in uploaded:
  #   content = uploaded[imageName]
  #   with open(imageName, 'wb') as f:
  #     f.write(content)

  # if len(uploaded.keys()):
  #   filename = next(iter(uploaded))
  #   print('Uploaded file:', filename)

  # # prints the image you uploaded
  # img = cv2.imread(filename)
  # cv2_imshow(img)

  # STEP 2: Create an HandLandmarker object.
  base_options = python.BaseOptions(model_asset_path='hand_landmarker.task')
  options = vision.HandLandmarkerOptions(base_options=base_options,
                                       num_hands=2)
  detector = vision.HandLandmarker.create_from_options(options)

  # STEP 3: Load the input image.
  image = mp.Image.create_from_file(filename)

  # STEP 4: Detect hand landmarks from the input image.
  detection_result = detector.detect(image)

  # STEP 5: Process the classification result. In this case, visualize it.
  annotated_image = draw_landmarks_on_image(image.numpy_view(), detection_result)
  cv2_imshow(cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR))


In [None]:
main()