In [None]:
%pip -q install ultralytics opencv-python pyyaml onvif-zeep

## Fun with cameras and AI

Now we can use real camera feeds over [RTSP](https://en.wikipedia.org/wiki/Real-Time_Streaming_Protocol) to perform inference tasks. 

First let's define some helper functions. We can use [ONVIF](https://en.wikipedia.org/wiki/ONVIF) to get the specific RTSP endpoint for a given camera. This allows us to lookup the RTSP endpoint using only the host address.

In [None]:
import os, requests, base64
from pprint import pprint
import cv2, yaml
from onvif import ONVIFCamera
from ultralytics import YOLO

model = YOLO('yolov8n.pt')  # You can use 'yolov8s.pt', 'yolov8m.pt', etc. for different model sizes

# This code loads the class names from the COCO dataset yaml file. 
def load_class_names(yaml_file):
    with open(yaml_file, 'rb') as f:
        data = yaml.safe_load(f)
    return data['names']

class_names = load_class_names('../artifacts/coco.yaml')  # Adjust the path to your .names file

def get_rtsp_url_from_onvif(host, port, user, passwd):
    camera = ONVIFCamera(host, port, user, passwd)
    media_service = camera.create_media_service()
    profiles = media_service.GetProfiles()
    token = profiles[0].token
    stream_uri = media_service.GetStreamUri({'StreamSetup': {'Stream': 'RTP-Unicast', 'Transport': 'RTSP'}, 'ProfileToken': token})
    return stream_uri.Uri

def view_rtsp_feed(rtsp_url):
    cap = cv2.VideoCapture(rtsp_url)
    
    if not cap.isOpened():
        print("Error: Could not open RTSP stream.")
        return
    
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Error: Failed to retrieve frame.")
            break
        
        
        cv2.imshow('RTSP Feed', frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()

def view_rtsp_feed_with_inference(rtsp_url):
    cap = cv2.VideoCapture(rtsp_url)
    
    if not cap.isOpened():
        print("Error: Could not open RTSP stream.")
        return
    
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Error: Failed to retrieve frame.")
            break

        # Perform detection
        results = model(frame)

        # Draw bounding boxes on the frame
        for result in results:
            for box in result.boxes:
                class_id = int(box.cls[0])
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                confidence = box.conf[0]
                label = f'{class_names[class_id]} {confidence:.2f}'
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        
        cv2.imshow('RTSP Feed', frame)  
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()

def capture_image_from_rtsp(rtsp_url, save_path="../video_frames/camera"):
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    
    cap = cv2.VideoCapture(rtsp_url)
    
    if not cap.isOpened():
        print("Error: Could not open RTSP stream.")
        return None
    
    ret, frame = cap.read()
    if not ret:
        print("Error: Failed to retrieve frame.")
        cap.release()
        return None
    
    image_path = os.path.join(save_path, "capture.jpg")
    cv2.imwrite(image_path, frame)
    
    cap.release()
    return image_path

# Convert an image to base64 encoding
def image_to_base64(image_path_or_url):
    if image_path_or_url.startswith('http://') or image_path_or_url.startswith('https://'):
        response = requests.get(image_path_or_url)
        image_data = response.content
    else:
        with open(image_path_or_url, "rb") as image_file:
            image_data = image_file.read()
    return base64.b64encode(image_data).decode('utf-8')

# Call the AI service API
def call_api(uri, key, payload):
    headers = {
        "Content-Type": "application/json",
        "api-key": key,
    }
    # Send request
    try:
        response = requests.post(uri, headers=headers, json=payload)
        response.raise_for_status()  # Will raise an HTTPError if the HTTP request returned an unsuccessful status code
    except requests.RequestException as e:
        raise SystemExit(f"Failed to make the request. Error: {e}")

    # Handle the response as needed (e.g., print or process)
    response_json = response.json()
    return response_json


## Connect to a camera

Connect to a real camera by providing a host address and credential in the code below, then run the cell. The lab speakers will provide the credentials for the camera endpoint.

In [None]:
# Camera connection usage
rtsp_url = "rtsp://rtsp:password1234@172.214.251.132:554/cam/realmonitor?channel=1&subtype=0"
view_rtsp_feed(rtsp_url)

### Run an object detection job with the camera

Run the next cell to perform real-time object detection using the camera. Press Q to close the window.

In [None]:
view_rtsp_feed_with_inference(rtsp_url) # press q to close the window

### Take picture and send to GPT4o
Replace the Azure AI service parameters below using the same ones you used for the [previous notebook](./5-AzureAIGPT4o.ipynb) and initialize the model using a prompt.

In [6]:
# Replace these values with the correct values from your Azure AI service
azureAiServiceName = "replace_me_with_name" # 1 - Service name
serviceUri = "replace_me_with_endpoint" # 2 - endpoint
API_KEY = "replace_me_with_key" # 3 - API Key
ENDPOINT = f"{serviceUri}openai/deployments/{azureAiServiceName}-gpt-4o-mini-deployment/chat/completions?api-version=2024-08-01-preview"

# Payload for the request
payload = {
  "messages": [
    {
      "role": "system",
      "content": [
        {
          "type": "text",
          "text": "You are an AI assistant that describes pictures."
        }
      ]
    }
  ],
  "temperature": 0.7,
  "top_p": 0.95,
  "max_tokens": 800
}

### Send live snapshot to your helpful Azure AI assistant for a description.

You can adjust the prompt in the payload json to get different results from the model. Try asking questions.

In [5]:
import numpy as np
import cv2

image = capture_image_from_rtsp(rtsp_url)
image_base64 = image_to_base64(image)
payload = {
  "messages": [
    { "role": "system", "content": "You are a helpful assistant." }, # provide context for the model to generate a response
        { "role": "user", "content": [  
            { 
                "type": "text", 
                "text": "Describe this image."  # tell the model what to do
            },
            { 
                "type": "image_url",
                "image_url": {
                    "url": "data:image/png;base64," + image_base64 # send the base64 encoded image with the payload
                }
            }
        ] } 
  ],
  "temperature": 0.7,
  "top_p": 0.95,
  "max_tokens": 800
}
# Load and display the image
retVal = call_api(ENDPOINT, API_KEY, payload)
# Extract the message content from the API response
message_content = retVal['choices'][0]['message']['content']
img = cv2.imread(image)
# Put the overlay text on the image
font = cv2.FONT_HERSHEY_SIMPLEX
bottom_left_corner_of_text = (10, img.shape[0] - 10)
font_scale = 1.5 # Increase the font size
font_color = (255, 255, 255)
line_type = 2

cv2.putText(img, message_content, bottom_left_corner_of_text, font, font_scale, font_color, line_type)
cv2.imshow('GPT-4o', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
pprint(retVal)

NameError: name 'rtsp_url' is not defined

## Complete Lab
 
Run the following cell to complete this lab!

In [None]:
%store -r userId
import requests;print(requests.post("https://jsleaderboard001-cnece0effvapgbft.westus2-01.azurewebsites.net/complete_task", headers={"Content-Type": "application/json"}, json={"user_id": userId, "task_id": 6}).json())