In [3]:
from openai import OpenAI
from dotenv import load_dotenv
import langchain_openai
import langchain
import os
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate


In [3]:
import cv2

def extract_frames(video_path, every_n_frames=30, max_seconds=10):
    """Extract every Nth frame from a video, capped at max_seconds."""
   
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    max_frames = int(fps * max_seconds)
    frames = []
    frame_count = 0
   
    while frame_count < max_frames:
        success, frame = cap.read()
       
        if not success:
            break
           
        if frame_count % every_n_frames == 0:
            frames.append(frame)
       
        frame_count += 1
   
    cap.release()
    print(f"Video FPS: {fps}, Max frames processed: {max_frames} ({max_seconds}s cap)")
    return frames

# Usage
frames = extract_frames("/Users/chandlershortlidge/Desktop/Ironhack/fitness-form-coach/data/raw_workout_videos/bench_press_form_check.MP4", every_n_frames=30)
print(f"Extracted {len(frames)} frames")


# Save them to check
for i, frame in enumerate(frames):
    cv2.imwrite(f"/Users/chandlershortlidge/Desktop/Ironhack/fitness-form-coach/data/processed/processed-images/bench_frame_{i}.jpg", frame)

Video FPS: 30.0, Max frames processed: 300 (10s cap)
Extracted 10 frames


In [4]:
# create a base64 encoder so taht we can send images to the model API. 
# the model API communicates via JSON. JSON is text. You can't put raw images bytes into JSON.
# So you encode the bytes into a text string which can be decoded by the model.

import base64

def base_encoder(filepath_in):
    with open(filepath_in, 'rb') as image_file:
        # read the binary data
        binary_data = image_file.read()

        # encode as base 64
        base64_data = base64.b64encode(binary_data)

        # convert to string 
        base64_string = base64_data.decode('utf-8')

        # print or use base64 string as needed
        return base64_string


In [7]:

test_image = base_encoder("/Users/chandlershortlidge/Desktop/Ironhack/fitness-form-coach/data/processed/processed-images/bench_frame_3.jpg")

llm = ChatOpenAI(model='gpt-4o')

response = llm.invoke([

{"role": "user", "content": 
 
[{"type": "text", "text":  """Your job is to analyze images of users working out for proper form, and list the key checkpoints of their to body evaluate. 
  Give me ONLY the bodypart checkpoints. Do NOT include evaluation suggestions. Do NOT include an intro sentence. 
  Output format should be exactly the example below.
**Example**
  Overhead press

  1. Feet & base
  2. Glutes & legs
  3. Core & Ribcage
  4. Shoulder position
  5. Bar path
  6. Head & Neck
  7. Lockout position
  8. Tempo and control
   """},


    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{test_image}"}}]}



])

print(response.text)

**Bench Press**

1. Feet & base
2. Glutes & hips
3. Back arch & position
4. Shoulder position
5. Elbow alignment
6. Wrist position
7. Bar path
8. Head & neck
9. Tempo and control


In [27]:
import re
def clean_classification_text(r):
    create_string = r.content
    response_cleaned = re.sub(r'[^a-zA-Z]', ' ', create_string)
    return response_cleaned

In [28]:
cleaned = clean_classification_text(response)
print(cleaned)

  Bench Press       Feet   base    Glutes   hips    Back arch   position    Shoulder position    Elbow alignment    Wrist position    Bar path    Head   neck    Tempo and control
