In [2]:
# Basic chat Message using API

from openai import OpenAI 
import os

MODEL="gpt-4o"
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

completion = client.chat.completions.create(
  model=MODEL,
  messages=[
    {"role": "system", "content": "You are a helpful assistant for language translation. Help me with convert to casual tone and conversational to sinhala language"},
    {"role": "user", "content": "Hi Thilina! Welcome to Agent Zappy, a platform designed to help senior managers in the automotive industry streamline customer communications. With features such as 24/7 instant replies, conversational email/SMS marketing, and human-like AI calling for appointment bookings, our platform ensures no lead or query goes unanswered. Sign up at www.agentzappy.com/signup for a demo or more information"}
  ]
)
print("Assistant: " + completion.choices[0].message.content)

Assistant: හෙලෝ තිලිනා! Agent Zappy වලට සාදරයෙන් පිළිගන්නවා. මේක වාහන කර්මාන්තයේ ජ්‍යෙෂ්ඨ කළමනාකරුවන්ට පාරිභෝගික සංවාද වේගවත් කරගන්න හදපු වේදිකාවක්. 24/7 එකෙන්ම පිළිතුරු දීම, සංකල්ප සම්බන්ධිත ඊ-මේල්/එස්එම්එස් අලෙවිකරණය, සහ වැඩිමහල් වෙන්කරීම්වලට මිනිස් මනසක් මෙන්ම AI ඇමතුම්වලට ඇති පහසුකම් සමග, අපේ වේදිකාව හැම පොරොන්දුවක්ම දියුණු කරයි. වැඩි විස්තර සහ ඩෙමො එකක් සඳහා www.agentzappy.com/signup වෙත පිවිසෙන්න.


In [4]:
# Image processing with Base64

import base64

image_path = "images.png"

def encode_image(image_path):
    with open(image_path, "rb") as img_file:
        return base64.b64encode(img_file.read()).decode("utf-8")
    
base64_image = encode_image(image_path)

response = client.chat.completions.create(
    model=MODEL,
    messages=[
        {"role": "system", "content": "You are a helpful assistant that responds in Markdown. Help me with my math homework!"},
        {"role": "user", "content": [
            {"type": "text", "text": "What's the area of the triangle?"},
            {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}
            }
        ]}
    ],
    temperature=0.0,
)
print(response.choices[0].message.content)

To find the area of the triangle \( \triangle ABC \), you can use the formula for the area of a triangle:

\[ \text{Area} = \frac{1}{2} \times \text{base} \times \text{height} \]

In the given triangle, \( AB \) is the base \( c \), and \( CD \) is the height \( h \).

So, the area \( A \) of the triangle is:

\[ A = \frac{1}{2} \times c \times h \]

To find the exact area, you need the values of \( c \) (the length of \( AB \)) and \( h \) (the length of \( CD \)). If you have these values, you can substitute them into the formula to get the area.


In [6]:
# Image processing using URL

response = client.chat.completions.create(
    model=MODEL,
    messages=[
        {"role": "system", "content": "You are a helpful assistant that responds in Markdown. Help me with my math homework!"},
        {"role": "user", "content": [
            {"type": "text", "text": "What's the area of the triangle?"},
            {"type": "image_url", "image_url": {"url": "https://static.tutors.com/assets/images/content/tutors-right-triangle-altitude-theorem.jpg"}}
        ]}
    ],
    temperature=0.0,
)
print(response.choices[0].message.content)

To find the area of the triangle \( \triangle ABC \), we can use the formula for the area of a triangle:

\[ \text{Area} = \frac{1}{2} \times \text{base} \times \text{height} \]

In the given triangle, \( AB = c \) is the base, and \( CD = h \) is the height.

So, the area of \( \triangle ABC \) is:

\[ \text{Area} = \frac{1}{2} \times c \times h \]

Thus, the area of the triangle is \( \frac{1}{2} \times c \times h \).


In [9]:
# Summarization: video summary
import cv2
from moviepy.editor import VideoFileClip
import time
import base64

VIDEO_PATH = "video.mp4"

def process_video(video_path, seconds_per_frame=2):
    base64Frames = []
    base_video_path, _ = os.path.splitext(video_path)
    
    video = cv2.VideoCapture(video_path)
    total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = video.get(cv2.CAP_PROP_FPS)
    frames_to_skip = int(fps * seconds_per_frame)
    curr_frames = 0
    
    while curr_frames < total_frames:
        video.set(cv2.CAP_PROP_POS_FRAMES, curr_frames)
        success, frame = video.read()
        if not success:
            break
        _, buffer = cv2.imencode(".jpg", frame)
        base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
        curr_frames += frames_to_skip
    video.release()
    
    audio_path = f"{base_video_path}.mp3"
    clip = VideoFileClip(video_path)
    clip.audio.write_audiofile(audio_path, bitrate="32k")
    clip.audio.close()
    clip.close()

    print(f"Extracted {len(base64Frames)} frames")
    print(f"Extracted audio to {audio_path}")
    return base64Frames, audio_path

base64Frames, audio_path = process_video(VIDEO_PATH, seconds_per_frame=1)

response = client.chat.completions.create(
    model=MODEL,
    messages=[
    {"role": "system", "content": "You are generating a video summary. Please provide a summary of the video. Respond in Markdown."},
    {"role": "user", "content": [
        "These are the frames from the video.",
        *map(lambda x: {"type": "image_url", 
                        "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames)
        ],
    }
    ],
    temperature=0,
)
print(response.choices[0].message.content)

MoviePy - Writing audio in video.mp3


                                                                      

MoviePy - Done.
Extracted 199 frames
Extracted audio to video.mp3
The video appears to be an educational tutorial involving two individuals, likely a teacher and a student, discussing a mathematical concept. The frames show them sitting at a table with a tablet displaying a geometric diagram, specifically a triangle. The person on the right, who seems to be the instructor, is explaining the concept while the student on the left listens and takes notes. The tablet screen shows various geometric shapes and annotations, indicating that the lesson involves geometry, possibly focusing on the properties of triangles. The student is engaged, asking questions, and the instructor is providing detailed explanations, using the tablet to illustrate points visually. The atmosphere is focused and educational.


In [10]:
# Summarization: audio summary

transcription = client.audio.transcriptions.create(
    model="whisper-1",
    file=open(audio_path, "rb"),
)

response = client.chat.completions.create(
    model=MODEL,
    messages=[
    {"role": "system", "content":"""You are generating a transcript summary. Create a summary of the provided transcription. Respond in Markdown."""},
    {"role": "user", "content": [
        {"type": "text", "text": f"The audio transcription is: {transcription.text}"}
        ],
    }
    ],
    temperature=0,
)
print(response.choices[0].message.content)

OpenAI invited me and my son Imran to test their new technology, specifically for tutoring math on Khan Academy. We started by loading the program and sharing our screen. I asked the AI to tutor my son on a math problem without giving him the answer directly, but by guiding him through questions.

The AI began by asking Imran to identify the sides of a triangle relative to angle alpha. Imran correctly identified the hypotenuse after some guidance, recognizing it as the longest side opposite the right angle. He then identified the adjacent and opposite sides correctly.

Next, the AI asked Imran to recall the formula for the sine of an angle in a right triangle, which he correctly stated as the opposite side over the hypotenuse. Imran applied this formula using the side lengths provided, calculating sine alpha as 7/25. The AI confirmed his answer and praised his understanding, offering further assistance if needed.


In [11]:
# Summarization: Audio + Visual Summary

response = client.chat.completions.create(
    model=MODEL,
    messages=[
    {"role": "system", "content":"""You are generating a video summary. Create a summary of the provided video and its transcript. Respond in Markdown"""},
    {"role": "user", "content": [
        "These are the frames from the video.",
        *map(lambda x: {"type": "image_url", 
                        "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames),
                        {"type": "text", "text": f"The audio transcription is: {transcription.text}"}
        ],
    }
],
    temperature=0,
)
print(response.choices[0].message.content)

### Video Summary

In this video, OpenAI invited a father and his son, Imran, to test out their new technology for tutoring math on Khan Academy. The father introduces the scenario and asks the AI to tutor his son on a math problem without giving away the answers directly. The AI engages with Imran, guiding him through identifying the sides of a right triangle relative to a given angle, alpha.

1. **Identifying the Hypotenuse**:
   - The AI asks Imran to identify the hypotenuse of the triangle.
   - Imran initially guesses incorrectly but is then guided to understand that the hypotenuse is the longest side opposite the right angle.
   - Imran correctly identifies side AB as the hypotenuse.

2. **Identifying the Opposite Side**:
   - The AI then asks Imran to identify the opposite side relative to angle alpha.
   - Imran deduces that side BC is the opposite side after confirming the hypotenuse and adjacent sides.

3. **Applying the Sine Formula**:
   - The AI asks Imran if he remembers 

In [12]:
# Q&A: Visual Q&A

QUESTION = "Question: Why did Sam Altman have an example about raising windows and turning the radio on?"

qa_visual_response = client.chat.completions.create(
    model=MODEL,
    messages=[
    {"role": "system", "content": "Use the video to answer the provided question. Respond in Markdown."},
    {"role": "user", "content": [
        "These are the frames from the video.",
        *map(lambda x: {"type": "image_url", "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames),
        QUESTION
        ],
    }
    ],
    temperature=0,
)
print("Visual QA:\n" + qa_visual_response.choices[0].message.content)

Visual QA:
Based on the frames from the video, it appears that Sam Altman is explaining a concept, likely related to technology or user experience, to a younger individual. The example about raising windows and turning the radio on is probably used to illustrate a point about automation, user control, or the integration of multiple functions in a system. This kind of example is often used to make abstract or complex ideas more relatable and easier to understand by connecting them to everyday experiences.


In [13]:
# Q&A: Audio Q&A

qa_audio_response = client.chat.completions.create(
    model=MODEL,
    messages=[
    {"role": "system", "content":"""Use the transcription to answer the provided question. Respond in Markdown."""},
    {"role": "user", "content": f"The audio transcription is: {transcription.text}. \n\n {QUESTION}"},
    ],
    temperature=0,
)
print("Audio QA:\n" + qa_audio_response.choices[0].message.content)

Audio QA:
The transcription provided does not mention Sam Altman or any example about raising windows and turning the radio on. The content focuses on a tutoring session involving math problems related to identifying sides of a triangle and calculating the sine of an angle. Therefore, I cannot provide an answer to the question based on the given transcription.


In [14]:
# Q&A: Visual + Audio Q&A

qa_both_response = client.chat.completions.create(
    model=MODEL,
    messages=[
    {"role": "system", "content":"""Use the video and transcription to answer the provided question."""},
    {"role": "user", "content": [
        "These are the frames from the video.",
        *map(lambda x: {"type": "image_url", 
                        "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames),
                        {"type": "text", "text": f"The audio transcription is: {transcription.text}"},
        QUESTION
        ],
    }
    ],
    temperature=0,
)
print("Both QA:\n" + qa_both_response.choices[0].message.content)

Both QA:
The frames and transcription provided do not contain any mention of Sam Altman or an example about raising windows and turning the radio on. The content focuses on a tutoring session where a father and his son are using OpenAI's technology to solve a math problem involving identifying the sides of a triangle and calculating the sine of an angle. 

If you have any additional context or specific details about Sam Altman's example, please provide them so I can assist you better.
