In [1]:
from dotenv import load_dotenv

# .env 파일에서 환경 변수를 로드
load_dotenv()

True

In [2]:
import google.generativeai as genai
from IPython.display import Markdown
import os
import time


# .env에서 API 키 가져오기
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

if not GOOGLE_API_KEY:
    raise ValueError("Google API key not found. Please set it in your .env file.")

# API 키 설정
genai.configure(api_key=GOOGLE_API_KEY)

# 비디오 파일 경로


# 파일 업로드 함수
def upload_and_process_file(file_path):
    print(f"Uploading file: {file_path}...")
    video_file = genai.upload_file(path=file_path)
    
    # 파일 처리 상태 확인
    while video_file.state.name == "PROCESSING":
        print('.', end='', flush=True)
        time.sleep(10)
        video_file = genai.get_file(video_file.name)
    
    if video_file.state.name == "FAILED":
        raise ValueError(f"File processing failed: {video_file.state.name}")
    
    print(f"\nCompleted upload: {video_file.uri}")
    return video_file

# LLM 요청 함수
def generate_content_from_video(video_file, prompt, model_name="gemini-1.5-flash-001", timeout=600):
    print("Making LLM inference request...")
    model = genai.GenerativeModel(model_name=model_name)
    response = model.generate_content([video_file, prompt], request_options={"timeout": timeout})
    return response

# 메인 로직
if __name__ == "__main__":
    video_file_name = "video/내가 엘든링 7번 깬 비법.f614.mp4"
    video_file = upload_and_process_file(video_file_name)
    # 프롬프트 설정
    prompt = '''
    해당영상에서 당신은 먹는 시간의 시작과 끝을 출력하고, 먹는 메뉴도 출력해야합니다.
    [출력예시]
    [1:01,1:31], 불고기
    [2:01,2:31], 비빔밥
    [3:01,3:31], 김치찌개
    '''
    
    # 컨텐츠 생성 요청
    response = generate_content_from_video(video_file, prompt)
    
    # 마크다운으로 출력
    display(Markdown(response.text))


  from .autonotebook import tqdm as notebook_tqdm
I0000 00:00:1725510286.817403 1842701 config.cc:230] gRPC experiments enabled: call_status_override_on_cancellation, event_engine_dns, event_engine_listener, http2_stats_fix, monitoring_experiment, pick_first_new, trace_record_callops, work_serializer_clears_time_cache
I0000 00:00:1725510286.823818 1842701 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported


Uploading file: video/내가 엘든링 7번 깬 비법.f614.mp4...
.
Completed upload: https://generativelanguage.googleapis.com/v1beta/files/blh6evvnwwcg
Making LLM inference request...


I0000 00:00:1725510302.115880 1842701 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported


[0:08, 0:57], 딸기 크림 빙수

In [3]:
response.text

'[0:08, 0:57], 딸기 크림 빙수'

In [5]:
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [12]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI
from openai import OpenAI

model = ChatOpenAI(api_key=OPENAI_API_KEY ,model="gpt-4o-mini",temperature=0)


class Seconds(BaseModel):
    start: int = Field(description="The start time of the food in seconds")
    end: int = Field(description="The end time of the food in seconds")

class VideoParser(BaseModel):
    time: list[Seconds] = Field(description="The time of the food in seconds")
    food_name: str = Field(description="The name of the food")

parser = JsonOutputParser(pydantic_object=VideoParser)

prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | model | parser

response = chain.invoke({"query": response.text})

print(response)


{'time': [{'start': 8, 'end': 57}], 'food_name': '딸기 크림 빙수'}


In [16]:
response['time']

[{'start': 8, 'end': 57}]

In [17]:
import cv2
import os

def extract_video_segment(input_video, start_time, end_time, output_folder, food_name):
    # 입력 비디오 열기
    cap = cv2.VideoCapture(input_video)
    
    # 비디오 속성 가져오기
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    
    # 출력 비디오 설정
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    output_filename = f"{food_name}_{start_time}_{end_time}.mp4"
    output_path = os.path.join(output_folder, output_filename)
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    # 시작 프레임으로 이동
    cap.set(cv2.CAP_PROP_POS_FRAMES, start_time * fps)
    
    # 프레임 추출 및 저장
    for _ in range((end_time - start_time) * fps):
        ret, frame = cap.read()
        if not ret:
            break
        out.write(frame)
    
    # 리소스 해제
    cap.release()
    out.release()
    
    return output_path

# 사용 예시
input_video = "video/내가 엘든링 7번 깬 비법.f614.mp4"
output_folder = "extract_video"
os.makedirs(output_folder, exist_ok=True)

for segment in response['time']:
    extracted_path = extract_video_segment(input_video, segment['start'], segment['end'], output_folder, response['food_name'])
    print(f"추출된 비디오: {extracted_path}")

추출된 비디오: extract_video/딸기 크림 빙수_8_57.mp4


In [18]:
import pandas as pd

def create_metadata_table(extracted_videos, food_name):
    metadata = []
    for video in extracted_videos:
        metadata.append({
            'video_path': video,
            'food_name': food_name
        })
    
    df = pd.DataFrame(metadata)
    df.to_csv('metadata.csv', index=False)
    print("메타데이터가 저장되었습니다.")
    return df

# 사용 예시
extracted_videos = [f for f in os.listdir(output_folder) if f.endswith('.mp4')]
metadata_df = create_metadata_table([os.path.join(output_folder, v) for v in extracted_videos], response['food_name'])

메타데이터가 저장되었습니다.


In [19]:
metadata_df

Unnamed: 0,video_path,food_name
0,extract_video/딸기 크림 빙수_8_57.mp4,딸기 크림 빙수


In [22]:
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA

# CSV 파일 로드
loader = CSVLoader(file_path='metadata.csv', encoding='utf-8')
documents = loader.load()

# 텍스트 분할
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

# 임베딩 및 벡터 저장소 생성
embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(docs, embeddings)

# 검색 기반 QA 체인 생성
qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=vectorstore.as_retriever())

# 사용 예시
query = "어떤 음식이 영상에 나오나요?"
result = qa.invoke({"query": query})
print(result)

Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2


{'query': '어떤 음식이 영상에 나오나요?', 'result': ' 계란 술안주 and 딸기 크림 빙수'}


In [21]:
docs

[Document(metadata={'source': 'metadata.csv', 'row': 0}, page_content='video_path: extract_video/딸기 크림 빙수_8_57.mp4\nfood_name: 딸기 크림 빙수')]

In [23]:
!pip install "routellm[serve,eval]"


I0000 00:00:1725515991.438615 1842701 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Collecting routellm[eval,serve]
  Downloading routellm-0.2.0-py3-none-any.whl.metadata (14 kB)
Collecting torch (from routellm[eval,serve])
  Downloading torch-2.4.1-cp311-none-macosx_11_0_arm64.whl.metadata (26 kB)
Collecting scikit-learn (from routellm[eval,serve])
  Using cached scikit_learn-1.5.1-cp311-cp311-macosx_12_0_arm64.whl.metadata (12 kB)
Collecting transformers (from routellm[eval,serve])
  Using cached transformers-4.44.2-py3-none-any.whl.metadata (43 kB)
Collecting datasets (from routellm[eval,serve])
  Downloading datasets-2.21.0-py3-none-any.whl.metadata (21 kB)
Collecting litellm (from routellm[eval,serve])
  Downloading litellm-1.44.17-py3-none-any.whl.metadata (32 kB)
Collecting matplotlib (from routellm[eval,serve])
  Using cached matplotlib-3.9.2-cp311-cp311-macosx_11_0_arm64.whl.metadata (11 kB)
Collecting pandarallel (from routellm[eval,serve])
  Downloading pandarallel-1.6.5.tar.gz (14 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting sglang (

In [24]:
!git clone https://github.com/lm-sys/RouteLLM.git
%cd RouteLLM
!pip install -e .[serve,eval]

I0000 00:00:1725516046.017926 1842701 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Cloning into 'RouteLLM'...
remote: Enumerating objects: 1027, done.[K
remote: Counting objects: 100% (166/166), done.[K
remote: Compressing objects: 100% (56/56), done.[K
remote: Total 1027 (delta 117), reused 122 (delta 110), pack-reused 861 (from 1)[K
Receiving objects: 100% (1027/1027), 5.36 MiB | 9.67 MiB/s, done.
Resolving deltas: 100% (626/626), done.
/Users/kdb/Desktop/rag_study/RouteLLM
zsh:1: no matches found: .[serve,eval]


I0000 00:00:1725516047.909814 1842701 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


In [None]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI
from openai import OpenAI

model = ChatOpenAI(api_key=OPENAI_API_KEY ,model="gpt-4o-mini",temperature=0)


class Seconds(BaseModel):
    start: int = Field(description="The start time of the food in seconds")
    end: int = Field(description="The end time of the food in seconds")

class VideoParser(BaseModel):
    time: list[Seconds] = Field(description="The time of the food in seconds")
    food_name: str = Field(description="The name of the food")

parser = JsonOutputParser(pydantic_object=VideoParser)

prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | model | parser

response = chain.invoke({"query": response.text})

print(response)


{'time': [{'start': 8, 'end': 57}], 'food_name': '딸기 크림 빙수'}


In [32]:
%pip install -e .[serve,eval]

zsh:1: no matches found: .[serve,eval]


I0000 00:00:1725516368.047000 1842701 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Note: you may need to restart the kernel to use updated packages.


In [2]:
text = "아 그래 ? 나는 백종원이 먹는 동영상을 보고싶은데"

In [10]:
os.environ["OPENAI_MODEL_NAME"] = "gpt-4o-mini"

In [20]:
from crewai import Agent, Task, Crew

# Research Agent 생성
research_agent = Agent(
    role='Video Recommender',
    goal='Determine if the user wants to watch a video based on the query',
    backstory="""You are an AI agent responsible for analyzing the user's query
    and deciding whether or not they want to watch a video.""",  # 사용할 LLM 지정
    verbose=True
)

# 사용자 쿼리
user_input = "아 그래 ? 나는 백종원이 먹는 동영상을 보고싶은데"

# Task 정의: 동영상 추천 여부 판단
task = Task(
    description='Analyze the user input and decide if a video should be played',
    expected_output='0 if the user does not want to watch a video, 1 if the user wants to watch a video',
    agent=research_agent,
)

In [None]:
# Crew 생성
crew = Crew(
    agents=[research_agent],
    tasks=[task],
    verbose=True
)

# Crew 실행
result = crew.kickoff()