In [5]:
import sys
from pathlib import Path
import os
import json
from typing import List
sys.path.append(str(Path(os.getcwd()).parent))

import gradio as gr
from llama_index.llms.nvidia import NVIDIA
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.llms.groq import Groq
from pydantic import BaseModel, parse_obj_as
from dotenv import load_dotenv

from data.transcript import get_video_transcript

In [6]:
load_dotenv(override=True)
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
# GROQ_API_KEY = os.environ["GROQ_API_KEY"]
llm = Groq(model="llama-3.1-8b-instant", api_key=GROQ_API_KEY)

In [14]:
topics_dict = {}
dropdown_options = []


In [9]:
transcript = get_video_transcript(url="https://www.youtube.com/watch?v=oz9cEqFynHU", return_str=True)

In [12]:
topic_extractor_prompt = f"""
    Given the transcript of a YouTube video tutorial:

    {transcript[:-100000]}

    Understand what this video is about and identify the main topics.
    Include as much description as possible for each topic.
    Make sure topics are not ambiguous and have meaningful segregation of content.

    Format your response as a JSON array of objects, where each object has two fields:
    - "name": The name of the topic (a short, concise title)
    - "description": A detailed description of the topic

    Example format:
    [
    {{
        "name": "Topic 1",
        "description": "Detailed description of Topic 1..."
    }},
    {{
        "name": "Topic 2",
        "description": "Detailed description of Topic 2..."
    }}
    ]

    Remove the preamble and ensure your output is valid JSON. 
    """

In [15]:
response = llm.complete(topic_extractor_prompt)

global topics_dict
try:
    json_response = json.loads(response.text)
    print(json_response)
    topics_list = parse_obj_as(List[Topic], json_response)
    topics_dict = {topic.name: topic.description for topic in topics_list}
    print(f"Extracted {len(topics_dict)} topics from the video transcript.")
    print(topics_dict)
except json.JSONDecodeError:
    print("Error: The LLM output was not valid JSON.")
except ValueError:
    print("Error: The LLM output did not match the expected format.")

topics_dict

Error: The LLM output was not valid JSON.


{}

In [29]:
x = response.text
x

'```json\n[\n  {\n    "name": "Introduction to the Project",\n    "description": "The host introduces the project, a web development tutorial, and explains the purpose of the video. They discuss the target audience, the tools and technologies used, and the expected outcome of the tutorial."\n  },\n  {\n    "name": "Setting Up the Development Environment",\n    "description": "The host explains how to set up a development environment for the project, including installing necessary software, configuring the code editor, and setting up a version control system. They also discuss the importance of using a code editor with features like code completion and debugging tools."\n  },\n  {\n    "name": "Understanding HTML and CSS Basics",\n    "description": "The host provides an overview of HTML and CSS, explaining the basics of these technologies, including HTML tags, CSS selectors, and CSS properties. They also discuss the importance of understanding the box model and how to use CSS to style 

In [28]:
print(json.loads(x.split('json')[1][:-3]))


[{'name': 'Introduction to the Project', 'description': 'The host introduces the project, a web development tutorial, and explains the purpose of the video. They discuss the target audience, the tools and technologies used, and the expected outcome of the tutorial.'}, {'name': 'Setting Up the Development Environment', 'description': 'The host explains how to set up a development environment for the project, including installing necessary software, configuring the code editor, and setting up a version control system. They also discuss the importance of using a code editor with features like code completion and debugging tools.'}, {'name': 'Understanding HTML and CSS Basics', 'description': 'The host provides an overview of HTML and CSS, explaining the basics of these technologies, including HTML tags, CSS selectors, and CSS properties. They also discuss the importance of understanding the box model and how to use CSS to style web pages.'}, {'name': 'Building a Responsive Layout with CSS