In [3]:
from dotenv import find_dotenv, load_dotenv
import os

load_dotenv(find_dotenv())

openai_api_key = os.getenv("OPENAI_API_KEY")

In [4]:
from openai import OpenAI 
MODEL="gpt-4o"
client = OpenAI(api_key=openai_api_key)

## TEXT DATA

In [6]:
completion = client.chat.completions.create(
  model=MODEL,
  messages=[
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Hello! I have 2 apples. My father gave me 2. I ate 3. Gave 1 to my mother. How many do I have?"}
  ]
)

print(completion.choices[0].message.content)

Assistant: Let's break down the problem step-by-step:

1. You started with 2 apples.
2. Your father gave you 2 more apples. Now you have 2 + 2 = 4 apples.
3. You ate 3 apples. Now you have 4 - 3 = 1 apple.
4. Then you gave 1 apple to your mother. Now you have 1 - 1 = 0 apples.

So, you have 0 apples left.


## IMAGE_DATA - BASE64

In [7]:
import base64

IMAGE_PATH = "complex_nature.jpg"

def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

base64_image = encode_image(IMAGE_PATH)

response = client.chat.completions.create(
    model=MODEL,
    messages=[
        {"role": "system", "content": "You are a helpful assistant"},
        {"role": "user", "content": [
            {"type": "text", "text": "What's the picture about?"},
            {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}
            }
        ]}
    ],
    temperature=0.0,
)

print(response.choices[0].message.content)

The picture shows a serene natural scene featuring a small waterfall cascading into a pool of water surrounded by lush green vegetation. The area appears to be a tranquil spot in a forest or jungle, with rocks and foliage adding to the picturesque setting. The water is clear, and the overall atmosphere is peaceful and refreshing.


## IMAGE DATA - URL

In [8]:
response = client.chat.completions.create(
    model=MODEL,
    messages=[
        {"role": "system", "content": "You are a helpful assistant"},
        {"role": "user", "content": [
            {"type": "text", "text": "What's the picture about?"},
            {"type": "image_url", "image_url": {"url": "https://thumbs.dreamstime.com/b/waterfall-sossegada-tourist-complex-capit%C3%B3lio-mg-brazil-december-beautiful-small-surrounded-plants-natural-beauty-212704578.jpg"}
            }
        ]}
    ],
    temperature=0.0,
)

print(response.choices[0].message.content)

The picture depicts a serene natural scene featuring a small waterfall cascading into a calm pool of water. The surrounding area is lush with green vegetation, including ferns and other plants, creating a tranquil and picturesque environment. The water appears clear, and there is a rock visible in the pool, adding to the natural beauty of the scene.


## VIDEO SUMMARY

In [11]:
import cv2
from moviepy.editor import VideoFileClip
import time
import base64

VIDEO_PATH = "C:/Users/vasan/Videos/Captures/cut.mp4"

def process_video(video_path, seconds_per_frame=2):
    base64Frames = []
    base_video_path, _ = os.path.splitext(video_path)

    video = cv2.VideoCapture(video_path)
    total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = video.get(cv2.CAP_PROP_FPS)
    frames_to_skip = int(fps * seconds_per_frame)
    curr_frame=0

    while curr_frame < total_frames - 1:
        video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
        success, frame = video.read()
        if not success:
            break
        _, buffer = cv2.imencode(".jpg", frame)
        base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
        curr_frame += frames_to_skip
    video.release()

    audio_path = f"{base_video_path}.mp3"
    clip = VideoFileClip(video_path)
    clip.audio.write_audiofile(audio_path, bitrate="32k")
    clip.audio.close()
    clip.close()

    print(f"Extracted {len(base64Frames)} frames")
    print(f"Extracted audio to {audio_path}")
    return base64Frames, audio_path

base64Frames, audio_path = process_video(VIDEO_PATH, seconds_per_frame=1)

response = client.chat.completions.create(
    model=MODEL,
    messages=[
    {"role": "system", "content": "You are generating a video summary. Please provide a summary of the video. Respond in Markdown."},
    {"role": "user", "content": [
        "These are the frames from the video.",
        *map(lambda x: {"type": "image_url", 
                        "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames)
        ],
    }
    ],
    temperature=0,
)
print(response.choices[0].message.content)

MoviePy - Writing audio in C:/Users/vasan/Videos/Captures/cut.mp3


                                                                    

MoviePy - Done.
Extracted 12 frames
Extracted audio to C:/Users/vasan/Videos/Captures/cut.mp3
The frames from the video show a document titled "YouTube_Video_Creation_Report" which lists details about various tutorial videos. Here is a summary of the content:

### YouTube Video Creation Report

#### Video 1
- **Title:** Creal: Complete Crash Course for Beginners
- **View Count:** N/A
- **Likes Count:** N/A
- **Channel Subscriber Count:** N/A
- **Date Published:** N/A
- **Link:** [Watch Here](https://www.youtube.com/watch?v=9bZkp7q19f0)

#### Video 2
- **Title:** Creal: Step-by-Step | Complete Course for Beginners
- **View Count:** N/A
- **Likes Count:** N/A
- **Channel Subscriber Count:** N/A
- **Date Published:** N/A
- **Link:** [Watch Here](https://www.youtube.com/watch?v=9bZkp7q19f0)

#### Video 3
- **Title:** Creal Tutorial for Beginners: Learn How to Use Latest Creal Features
- **View Count:** N/A
- **Likes Count:** N/A
- **Channel Subscriber Count:** N/A
- **Date Published:** N/A

## AUDIO SUMMARY

In [10]:
transcription = client.audio.transcriptions.create(
    model="whisper-1",
    file=open(audio_path, "rb"),
)

response = client.chat.completions.create(
    model=MODEL,
    messages=[
    {"role": "system", "content":"""You are generating a transcript summary. Create a summary of the provided transcription. Respond in Markdown."""},
    {"role": "user", "content": [
        {"type": "text", "text": f"The audio transcription is: {transcription.text}"}
        ],
    }
    ],
    temperature=0,
)
print(response.choices[0].message.content)

The speaker discusses their latest project, a crew AI application designed to automate several tasks for their YouTube channel. The application focuses on automating four key steps: title creation, description creation, thumbnail creation, and tags identification. The speaker uses Google API, ChargeBT for LLM, and crew AI for the agent framework. The application analyzes top-performing videos related to a given title and generates optimized titles, descriptions, thumbnail prompts, and relevant tags. The speaker expresses satisfaction with the framework and invites viewers to check the link in the description or comment section for a detailed explanation of the code and architecture.


## AUDIO + VIDEO SUMMARY

In [12]:
transcription = client.audio.transcriptions.create(
    model="whisper-1",
    file=open(audio_path, "rb"),
)

response = client.chat.completions.create(
    model=MODEL,
    messages=[
    {"role": "system", "content":"""You are generating a video summary. Create a summary of the provided video and its transcript. Respond in Markdown"""},
    {"role": "user", "content": [
        "These are the frames from the video.",
        *map(lambda x: {"type": "image_url", 
                        "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames),
                        {"type": "text", "text": f"The audio transcription is: {transcription.text}"}
        ],
    }
],
    temperature=0,
)
print(response.choices[0].message.content)

### Video Summary

The video showcases a project where the creator has developed a Crew.ai application. The frames from the video display a detailed report of various YouTube tutorial videos related to Crew.ai, including their titles, view counts, URLs, and publication dates. The report is organized in a table format, listing multiple videos aimed at beginners to help them learn and use Crew.ai effectively.

#### Key Points:
1. **Project Overview**: The creator introduces their latest project, a Crew.ai application.
2. **Video Report**: The frames show a comprehensive report of YouTube tutorials on Crew.ai.
3. **Tutorial Details**:
   - Titles: Various titles such as "CrewAI Tutorial: Complete Crash Course for Beginners" and "CrewAI Step-by-Step | Complete Course for Beginners".
   - View Counts: Information on the number of views each video has received.
   - URLs: Links to the YouTube videos.
   - Publication Dates: Dates when the videos were published.
4. **Purpose**: The report aim

## AUDIO + VIDEO QA

In [13]:
qa_both_response = client.chat.completions.create(
    model=MODEL,
    messages=[
    {"role": "system", "content":"""Use the video and transcription to answer the provided question."""},
    {"role": "user", "content": [
        "These are the frames from the video.",
        *map(lambda x: {"type": "image_url", 
                        "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames),
                        {"type": "text", "text": f"The audio transcription is: {transcription.text}"},
        "What is the framework the person is discussing about"
        ],
    }
    ],
    temperature=0,
)
print("Both QA:\n" + qa_both_response.choices[0].message.content)

Both QA:
The person is discussing about the "CrewaI" framework.
