In [1]:
# Import required libraries for JSON parsing, data modeling and LLM integration
from langchain_core.output_parsers import JsonOutputParser
from pydantic import BaseModel, Field
from langchain_google_genai import ChatGoogleGenerativeAI
from typing import List


# Define VideoAnalysis class to structure video metadata
class VideoAnalysis(BaseModel):
    ad_type: str = Field(description="Type of advertisement (e.g., Brand ad, Product ad, etc.)")
    main_category: str = Field(description="Main category of the video")
    sub_categories: List[str] = Field(description="List of video sub-categories")
    main_theme_message: str = Field(description="Main theme and message of the video")
    tone_and_manner: str = Field(description="Tone and manner of the video")
    target_audience: List[str] = Field(description="Target customer segments")
    key_keywords: List[str] = Field(description="List of key keywords")
    suitable_industries: List[str] = Field(description="List of suitable industries")
    expected_viewer_interest: str = Field(description="Expected viewer interest level (High/Medium/Low)")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from langchain_google_genai import ChatGoogleGenerativeAI

gemini_llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    temperature=0
)

In [6]:
import os
import google.generativeai as genai
import time
import os
import json
import yt_dlp
from dotenv import load_dotenv
load_dotenv()

class GeminiVideoAnalyzer:
    def __init__(self):
        
        self.GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
        genai.configure(api_key=self.GOOGLE_API_KEY)
        self.model = genai.GenerativeModel(model_name="gemini-1.5-pro")
        
    def download_video(self, url: str) -> str:
        """Download YouTube video"""
        video_dir = "download_video"
        os.makedirs(video_dir, exist_ok=True)
        
        try:
            ydl_opts = {
                'format': 'bestvideo[height<=720][ext=mp4]+bestaudio[ext=m4a]/best[height<=720][ext=mp4]/best',
                'outtmpl': os.path.join(video_dir, '%(title)s.%(ext)s'),
                'quiet': True,
                'no_warnings': True,
                'extract_flat': False,
                'merge_output_format': 'mp4'
            }
            
            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                info = ydl.extract_info(url, download=True)
                video_path = ydl.prepare_filename(info)
                
                if not os.path.exists(video_path):
                    raise FileNotFoundError(f"Downloaded video not found: {video_path}")
                
                print(f"Video download completed: {video_path}")
                return video_path
                
        except Exception as e:
            print(f"Error occurred during video download: {str(e)}")
            raise

    def analyze_video(self, video_path: str) -> dict:
        """Analyze video using Gemini"""
        try:
            video_file = genai.upload_file(video_path, mime_type='video/mp4')
            print(f"Starting file upload: {video_path}")

            if video_file.state.name == "PROCESSING":
                print('Processing upload...', end='', flush=True)
                time.sleep(5)
                video_file = genai.get_file(video_file.name)

            if video_file.state.name == "FAILED":
                raise ValueError(f"File processing failed: {video_file.state}")
            elif video_file.state.name == "ACTIVE":
                print(f"\nFile processing complete! Status: {video_file.state.name}")

            prompt = """
            Please analyze the main content of this video. Include the following items:
            
            1. Ad type (Product ad, Brand ad, etc.)
            2. Main category
            3. Sub categories
            4. Main theme/message
            5. Tone and manner
            6. Target audience
            7. Key keywords
            8. Suitable industries
            9. Expected viewer interest (High/Medium/Low)
            
            Please respond in JSON format.
            """

            print("Requesting Gemini analysis...")
            response = self.model.generate_content(
                [video_file, prompt],
                request_options={"timeout": 600}
            )
            
            return response.text

        except Exception as e:
            print(f"Error occurred during video analysis: {str(e)}")
            raise
        finally:
            # Clean up temporary files
            if os.path.exists(video_path):
                try:
                    os.remove(video_path)
                except Exception as e:
                    print(f"Failed to delete temporary file: {str(e)}")

    def analyze_youtube_video(self, url: str) -> dict:
        """Run complete analysis process from YouTube URL"""
        try:
            video_path = self.download_video(url)
            result = self.analyze_video(video_path)
            return result
        except Exception as e:
            print(f"Analysis failed: {str(e)}")
            raise

In [7]:
url = "https://www.youtube.com/watch?v=XMgt2e7l6hk"
# 분석기 초기화 및 실행
try:
    analyzer = GeminiVideoAnalyzer()
    result = analyzer.analyze_youtube_video(url)
    print(json.dumps(result, indent=2, ensure_ascii=False))
except Exception as e:
    print(f"error in run: {str(e)}")

Video download completed: download_video/Google Gemini ｜ 엉뚱발랄 채령과 Gemini가 새로 개발(？)한 젠가 떡볶이🤣 ｜ 제미나이 키친 - 채령 15s ver..mp4
Starting file upload: download_video/Google Gemini ｜ 엉뚱발랄 채령과 Gemini가 새로 개발(？)한 젠가 떡볶이🤣 ｜ 제미나이 키친 - 채령 15s ver..mp4
Processing upload...
File processing complete! Status: ACTIVE
Requesting Gemini analysis...
"Sure, here’s the video analysis in JSON format.\n\n```json\n{\n  \"ad_type\": \"Product Ad\",\n  \"main_category\": \"Technology\",\n  \"sub_categories\": [\"Software\", \"Artificial Intelligence\", \"Mobile Apps\"],\n  \"main_theme_message\": \"Introducing Google Gemini, a helpful AI assistant that can answer questions, provide information, and even offer creative inspiration for cooking.\",\n  \"tone_and_manner\": \"Friendly, Playful, Informative\",\n  \"target_audience\": [\"Tech-savvy individuals\", \"Early adopters\", \"People interested in AI\", \"Users seeking convenient information access\", \"Content creators\"],\n  \"key_keywords\": [\"Google Gemini\", 

In [8]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate

def create_video_analysis_chain():
    """Function to create LangChain chain for video analysis"""
    parser = JsonOutputParser(pydantic_object=VideoAnalysis)
    
    prompt = PromptTemplate(
        template="""Please analyze the given video and provide detailed information.

    {format_instructions}

    Video description: {input}
    """,
        input_variables=["input"],
        partial_variables={"format_instructions": parser.get_format_instructions()},
    )
    
    chain = prompt | gemini_llm | parser
    return chain

def analyze_video(video_description: str, context: str = ""):
    """
    Function that analyzes video and returns structured results
    
    Args:
        video_description (str): Description of video to analyze
        context (str): Additional context information (optional)
    
    Returns:
        VideoAnalysis: Analysis results
    """
    chain = create_video_analysis_chain()
    result = chain.invoke({
        "input": video_description,
    })
    return result

In [9]:
final_result = analyze_video(result)
print(final_result)

{'ad_type': 'Product Ad', 'main_category': 'Technology', 'sub_categories': ['Software', 'Artificial Intelligence', 'Mobile Apps'], 'main_theme_message': 'Introducing Google Gemini, a helpful AI assistant that can answer questions, provide information, and even offer creative inspiration for cooking.', 'tone_and_manner': 'Friendly, Playful, Informative', 'target_audience': ['Tech-savvy individuals', 'Early adopters', 'People interested in AI', 'Users seeking convenient information access', 'Content creators'], 'key_keywords': ['Google Gemini', 'AI assistant', 'Artificial intelligence', 'Mobile app', 'Information retrieval', 'Creative assistance', 'Cooking inspiration', 'Tteokbokki'], 'suitable_industries': ['Technology', 'Food and beverage', 'Content creation', 'Education', 'Customer service'], 'expected_viewer_interest': 'Medium'}


In [11]:
from app.vector import AdVectorDB
from app.vector_search import AdVectorSearch
# Initialize and save vector DB
csv_path = "/Users/kdb/Desktop/youtube_add/final_ad_analysis.csv"
vector_db = AdVectorDB(csv_path)
vector_db.initialize_vector_store("ad_vectorstore")

# Execute vector search
from app.vector_search import AdVectorSearch
vector_search = AdVectorSearch("ad_vectorstore")
similar_video = vector_search.search_similar_ads(result)

# Print results
print("\nSimilar Ad Search Results:")
print(f"Filename: {similar_video['metadata']['filename']}")
print(f"Category: {similar_video['metadata']['main_category']}")
print(f"Ad Type: {similar_video['metadata']['content_type']}")
print(f"Target Audience: {similar_video['metadata']['target_audience']}")
print("\nAd Content:")
print(similar_video['content'])

벡터 스토어 초기화 시작...
CSV 파일 로드 완료: 6 개의 광고 데이터
Document 객체 생성 완료: 6 개
벡터 스토어 생성 완료
벡터 스토어 저장 완료: ad_vectorstore

Similar Ad Search Results:
Filename: [ KT ｜ Microsoft 파트너십편 ] KT가 만듭니다. 글로벌 K-AI.mp4
Category: IT/기술
Ad Type: 브랜드 광고
Target Audience: ['기업 관계자', 'IT 업계']

Ad Content:
광고 유형: 브랜드 광고
        주요 카테고리: IT/기술
        서브 카테고리: 인공지능(AI), 통신, 소프트웨어
        주요 테마: KT-Microsoft AI 파트너십
        톤앤매너: 밝고 긍정적, 미래지향적
        타겟 고객: 기업 관계자, IT 업계
        키워드: 인공지능, AI, KT, Microsoft
        적합 산업군: IT/기술, 통신, 소프트웨어, 교육


In [12]:
similar_video['metadata']['video_path']

'/Users/kdb/Desktop/youtube_add/KT_ad/[ KT ｜ Microsoft 파트너십편 ] KT가 만듭니다. 글로벌 K-AI.mp4'

In [14]:
from IPython.display import Video, display
import os
from pathlib import Path
import urllib.parse

def play_video(similar_video):
    """Play video"""
    try:
        base_dir = Path("/Users/kdb/Desktop/youtube_add/KT_ad")
        filename = "[ 팬메이드K-AI 아이디어 챌린지 ] No.01 ‘배려심 넘치는 조명’편 ｜ KT.mp4"
        
        # Find actual file in current directory
        for file in base_dir.iterdir():
            if file.name == filename:
                video_path = file
                break
        
        if 'video_path' in locals():
            # Display video (with size adjustment)
            display(Video(str(video_path), embed=True, width=800, height=450))
            print(f"Playing file: {video_path}")
        else:
            print("File not found.")
            print("\nDirectory contents:")
            for file in base_dir.iterdir():
                print(file.name)
            
    except Exception as e:
        print(f"Error occurred while playing video: {e}")
        if 'video_path' in locals():
            print(f"Attempted path: {video_path}")

# Play video
play_video(similar_video)

Playing file: /Users/kdb/Desktop/youtube_add/KT_ad/[ 팬메이드K-AI 아이디어 챌린지 ] No.01 ‘배려심 넘치는 조명’편 ｜ KT.mp4
