* https://platform.openai.com/usage

* [PyCon US 2024 - YouTube](https://www.youtube.com/playlist?list=PL2Uw4_HvXqvYhjub9bw4uDAmNtprgAvlJ)

In [None]:
# !pip install -Uq pytube

In [None]:
import sqlite3
import pandas as pd
import logging
from openai import OpenAI
import os
from tqdm import tqdm

# OpenAI API 키 설정
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# 로깅 설정
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# 데이터베이스 연결
conn = sqlite3.connect('youtube_playlist.db')
cursor = conn.cursor()

# 새로운 컬럼 추가 함수
def add_column_if_not_exists(table_name, column_name, column_type):
    cursor.execute(f"PRAGMA table_info({table_name})")
    columns = [column[1] for column in cursor.fetchall()]
    if column_name not in columns:
        cursor.execute(f"ALTER TABLE {table_name} ADD COLUMN {column_name} {column_type}")

# 새로운 컬럼 추가
add_column_if_not_exists('videos', 'subtitle_length', 'INTEGER')
add_column_if_not_exists('videos', 'improved_subtitles', 'TEXT')
add_column_if_not_exists('videos', 'korean_subtitles', 'TEXT')
add_column_if_not_exists('videos', 'korean_subtitle_length', 'INTEGER')

# 자막 길이 계산 및 업데이트 함수
def update_subtitle_length():
    cursor.execute('UPDATE videos SET subtitle_length = LENGTH(subtitles) WHERE subtitle_length IS NULL')
    conn.commit()

# OpenAI API를 사용한 자막 개선 및 번역 함수
def improve_and_translate(text):
    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": "You are a helpful assistant that improves English text and translates it to Korean."},
                {"role": "user", "content": f"Improve the following English text and then translate it to Korean: {text}"}
            ]
        )
        result = response.choices[0].message.content.split('\n\n')
        improved = result[0].replace("Improved English: ", "")
        translated = result[1].replace("Korean translation: ", "")
        return improved, translated
    except Exception as e:
        logging.error(f"Error in API call: {str(e)}")
        return None, None

# 자막 개선 및 번역 실행
def process_subtitles():
    cursor.execute('SELECT id, subtitles FROM videos WHERE improved_subtitles IS NULL OR korean_subtitles IS NULL')
    rows = cursor.fetchall()
    
    for row in tqdm(rows, desc="Processing subtitles"):
        id, subtitles = row
        improved, translated = improve_and_translate(subtitles)
        if improved and translated:
            cursor.execute('''
                UPDATE videos 
                SET improved_subtitles = ?, 
                    korean_subtitles = ?, 
                    korean_subtitle_length = LENGTH(?)
                WHERE id = ?
            ''', (improved, translated, translated, id))
            conn.commit()

# 메인 실행
if __name__ == "__main__":
    update_subtitle_length()
    logging.info("Subtitle lengths updated.")
    
    process_subtitles()
    logging.info("Subtitles improved and translated.")
    
    # 결과 확인
    df = pd.read_sql_query("SELECT * FROM videos", conn)
    print(df)

    conn.close()

In [None]:
# 데이터베이스 연결
conn = sqlite3.connect('youtube_playlist.db')
cursor = conn.cursor()

df = pd.read_sql_query("SELECT * FROM videos", conn)
conn.close()
df

In [None]:
df.iloc[-5]["subtitles"][:300]

In [None]:
df.iloc[-5]

In [None]:
df[df["korean_subtitles"].notnull()].iloc[-1]

In [None]:
df.loc[7, "korean_subtitles"]