In [None]:
# Boring Installation Stuff - takes 2-5 minutes
!sh load.sh

# Helps you get the packages you don't have
import sys
!{sys.executable} -m pip install opencv-python

# Import all other packages
from datascience import *
from ultralytics import YOLO
from PIL import Image
from ipywidgets.widgets import IntSlider, interact_manual

import numpy as np
import cv2

import matplotlib.pyplot as plt
%matplotlib inline


# Some useful helper functions
def show_image(im, size=None):
    "Shows an image from a NumPy array"
    fig, ax = plt.subplots(figsize=size)
    ax.imshow(im, cmap="gray", vmin=0, vmax=255)
    ax.set_xticks([])
    ax.set_yticks([])

def read_video(path):
    "Reads a video file into NumPy array"
    frames = []

    cap = cv2.VideoCapture(path)
    ret = True
    for _ in np.arange(40):
        ret, img = cap.read() 
        if ret:
            frames.append(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    return np.stack(frames, axis=0)

# Lecture 31: Working with Images, Computer Vision

## Image Data

Images are just two-dimensional arrays. You can think about them as several **arrays within one larger array**.

In [None]:
# Here is an example of a 3x3 image
im = make_array(
    make_array(0, 255, 0),
    make_array(255, 0, 255),
    make_array(0, 255, 0),
)
im

We can show our image using the helper `show_image` function that uses `matplotlib`'s `imshow` method.

In [None]:
show_image(im)

Now we can create images with specific shades of black or white.

In [None]:
# Create an image of all black pixels
black = make_array(
    make_array(0, 0, 0),
    make_array(0, 0, 0),
    make_array(0, 0, 0),
)

# Create an image of all white pixels
white = make_array(
    make_array(255, 255, 255),
    make_array(255, 255, 255),
    make_array(255, 255, 255),
)

# Create an image of all grey pixels
grey = make_array(
    make_array(125, 125, 125),
    make_array(125, 125, 125),
    make_array(125, 125, 125),
)

In [None]:
show_image(black)

In [None]:
show_image(white)

In [None]:
show_image(grey)

<div style="width:125px; margin:0 auto;"><b>(back to slides)</b></div>

## What About Color?

Color images consist of **three color channels**, RGB, which represent the intensity of that particular color for every pixel. Every pixel has a value for **each color channel**. For example, a **red pixel** should have the following channel values: `R=255, G=0, B=0`

In [None]:
# Create a red pixel using the array methods
red_pixel = make_array(255, 0, 0)

In [None]:
# Create an image using all red pixels
red = make_array(
    make_array(red_pixel, red_pixel, red_pixel),
    make_array(red_pixel, red_pixel, red_pixel),
    make_array(red_pixel, red_pixel, red_pixel)
)
red

In [None]:
show_image(red)

In [None]:
# Create a green and blue pixel like before
green_pixel = make_array(0, 255, 0)
blue_pixel = make_array(0, 0, 255)

In [None]:
# Create an image using green and blue pixels
green_blue = make_array(
    make_array(green_pixel, blue_pixel, green_pixel),
    make_array(green_pixel, blue_pixel, green_pixel),
    make_array(green_pixel, blue_pixel, green_pixel)
)
show_image(green_blue)

We can combine each color channel in the following way to make **purple** and **orange**.

In [None]:
purple_pixel = make_array(100, 0, 255)
yellow_pixel = make_array(255, 150, 0)

cool_image = make_array(
    make_array(purple_pixel, blue_pixel, red_pixel),
    make_array(green_pixel, yellow_pixel, green_pixel),
    make_array(red_pixel, blue_pixel, purple_pixel)
)
show_image(cool_image)

## Loading in Your Own Images

Rather than manually creating our image arrays, we can load in our own images to the notebook

In [None]:
kevin = plt.imread("kevin.jpg")
kevin

In [None]:
show_image(kevin, size=(6, 8))

We can figure out the **dimensions of our image** by using the `.shape` attribute. We see that the output shape is actually three-dimensional.

In [None]:
print(f"The image of kevin is {kevin.shape[0]} pixels tall \
and {kevin.shape[1]} pixels wide, and has {kevin.shape[2]} channels")

<div style="width:125px; margin:0 auto;"><b>(back to slides)</b></div>

## Video Data

Videos are sequences of frames processed at a high rate. Videos are measured in Frames Per Second (FPS). Here's an example of a few anonymous tennis players.

<video width="950" controls src="swing.mov" />

In [None]:
video = read_video("swing.mov")
video

In [None]:
print(f"There are {video.shape[0]} images that are all {video.shape[1]} pixels tall \
and {video.shape[2]} pixels wide with {video.shape[3]} channels")

We can also look at each frame individually to see what the video is comprised of.

In [None]:
def show_video_frame(num):
    "Shows the desired video frame"
    show_image(video[num], size=(14, 14))
    
interact_manual(show_video_frame, num=IntSlider(min=0, max=len(video)-1, step=1));

<div style="width:125px; margin:0 auto;"><b>(back to slides)</b></div>

## YOLO v8

Let's explore the cutting-edge, state-of-the-art object detection model called YOLO v8. Note that if you are running this **on DataHub**, you'll need to first install the `ultralytics` package and import `YOLO`. The course staff can help you if you're having trouble.

In [None]:
# Load in the newest YOLO detection model
detect = YOLO('yolov8m.pt') 

In [None]:
# Run prediction on camera
detect.predict(source="0", show=True)