In [2]:
import os
from dotenv import load_dotenv
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from pytrends.request import TrendReq
from openai import OpenAI
import streamlit as st
import pandas as pd
from collections import Counter
import re
import jieba
import jieba.posseg as pseg
from nltk import ngrams
from textblob import TextBlob

# Load environment variables
load_dotenv()

# Configure API clients
YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY')
youtube = build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)
pytrends = TrendReq(hl='zh-TW', tz=480)  # Taiwan timezone
openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))

def get_youtube_trends():
    request = youtube.videos().list(
        part="snippet",
        chart="mostPopular",
        regionCode="TW",
        maxResults=50
    )
    response = request.execute()
    return [item['snippet']['title'] for item in response['items']]

def get_google_trends():
    pytrends.build_payload(kw_list=[''], geo='TW')
    trends = pytrends.trending_searches(pn='taiwan')
    return trends.iloc[:10, 0].tolist()  # Top 10 trends

def analyze_titles(titles):
    # Word frequency analysis
    words = [word for title in titles for word in jieba.cut(title)]
    word_freq = Counter(words).most_common(20)

    # Part-of-speech analysis
    pos_tags = []
    for title in titles:
        pos_tags.extend([pos for word, pos in pseg.cut(title)])
    pos_freq = Counter(pos_tags).most_common(10)

    # N-gram analysis (for common phrases)
    all_words = [word for title in titles for word in jieba.cut(title)]
    bigrams = list(ngrams(all_words, 2))
    trigrams = list(ngrams(all_words, 3))
    bigram_freq = Counter(bigrams).most_common(10)
    trigram_freq = Counter(trigrams).most_common(10)

    # Sentiment analysis
    sentiments = [TextBlob(title).sentiment.polarity for title in titles]
    avg_sentiment = sum(sentiments) / len(sentiments)

    # Structural analysis
    patterns = [
        (r'\d+', 'NUMBER'),
        (r'[！!?？]', 'EXCLAMATION'),
        (r'「.*?」', 'QUOTE'),
        (r'（.*?）', 'PARENTHESIS'),
        (r'…', 'ELLIPSIS'),
        (r'VS|v.s.|vs', 'VERSUS'),
    ]
    
    structure_elements = []
    for title in titles:
        for pattern, replacement in patterns:
            title = re.sub(pattern, replacement, title)
        structure_elements.extend(title.split())
    
    structure_freq = Counter(structure_elements).most_common(20)

    # Title length analysis
    title_lengths = [len(title) for title in titles]
    avg_length = sum(title_lengths) / len(title_lengths)

    return {
        'word_freq': word_freq,
        'pos_freq': pos_freq,
        'bigram_freq': bigram_freq,
        'trigram_freq': trigram_freq,
        'avg_sentiment': avg_sentiment,
        'structure_freq': structure_freq,
        'avg_length': avg_length
    }

def analyze_trending_titles(titles):
    prompt = f"""Analyze the following 50 trending YouTube video titles in Taiwan:

{', '.join(titles)}

Please identify and summarize:
1. Common themes or topics
2. Structural elements (e.g., use of numbers, questions, emojis)
3. Language patterns or phrases that appear frequently
4. Emotional triggers or psychological tactics used
5. SEO elements or keywords that seem important

Provide a concise summary of the viral formula based on these titles, focusing on what makes them attractive and clickable.
"""

    response = openai_client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are an expert in analyzing viral content and YouTube trends in Taiwan."},
            {"role": "user", "content": prompt}
        ]
    )
    
    return response.choices[0].message.content

def generate_titles(topic, industry, style, trend_analysis, viral_formula, num_titles=10):
    prompt = f"""Create {num_titles} attention-grabbing YouTube video titles in Traditional Chinese that combine the trending topic '{topic}' with the industry '{industry}' and style '{style}'.

Use the following viral formula extracted from current trending videos:
{viral_formula}

Also consider these trending elements:
Common words: {', '.join([word for word, _ in trend_analysis['word_freq'][:5]])}
Common phrases: {', '.join([' '.join(phrase) for phrase, _ in trend_analysis['trigram_freq'][:3]])}
Common structures: {', '.join([struct for struct, _ in trend_analysis['structure_freq'][:5]])}

The titles should:
1. Sound natural and human-like
2. Be highly clickable and shareable
3. Incorporate SEO-friendly elements
4. Reflect current Taiwanese YouTube trends
5. Be around {trend_analysis['avg_length']:.0f} characters long

Please generate {num_titles} unique and creative titles that follow these guidelines."""

    response = openai_client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a creative content title generator for Taiwanese YouTube videos, skilled in creating viral and SEO-friendly titles."},
            {"role": "user", "content": prompt}
        ]
    )
    
    # Extract titles from the response
    generated_titles = response.choices[0].message.content.split('\n')
    # Remove any numbering or bullet points
    generated_titles = [title.lstrip('0123456789. ') for title in generated_titles if title.strip()]
    
    return generated_titles[:num_titles]  # Ensure we return exactly num_titles

def main():
    st.title("台灣 YouTube 趨勢標題生成器")

    industry = st.text_input("請輸入您的產業領域：")
    style = st.text_input("請輸入您想要的標題風格：")

    if industry and style:
        with st.spinner("分析當前趨勢..."):
            youtube_trends = get_youtube_trends()
            google_trends = get_google_trends()
            trend_analysis = analyze_titles(youtube_trends)
            viral_formula = analyze_trending_titles(youtube_trends)

        st.subheader("當前 YouTube 趨勢分析")
        st.text_area("病毒式傳播公式", viral_formula, height=200)

        st.subheader("Google 熱門趨勢")
        selected_trend = st.selectbox("選擇一個熱門趨勢：", google_trends)

        if selected_trend:
            with st.spinner("生成標題中..."):
                titles = generate_titles(selected_trend, industry, style, trend_analysis, viral_formula)
            
            st.subheader("生成的趨勢標題 (請選擇最喜歡的)")
            selected_title = st.radio("選擇一個標題：", titles)
            
            if selected_title:
                st.success(f"你選擇的標題是：{selected_title}")
                if st.button("複製到剪貼板"):
                    st.write("標題已複製！")

        st.subheader("當前 YouTube 熱門標題分析")
        st.write("常見詞彙：", ", ".join([f"{word} ({count})" for word, count in trend_analysis['word_freq'][:10]]))
        st.write("常見詞性：", ", ".join([f"{pos} ({count})" for pos, count in trend_analysis['pos_freq']]))
        st.write("常見短語：", ", ".join([f"{' '.join(phrase)} ({count})" for phrase, count in trend_analysis['trigram_freq'][:5]]))
        st.write("常見結構：", ", ".join([f"{struct} ({count})" for struct, count in trend_analysis['structure_freq'][:10]]))
        st.write(f"平均情感極性：{trend_analysis['avg_sentiment']:.2f}")
        st.write(f"平均標題長度：{trend_analysis['avg_length']:.0f} 字元")

if __name__ == "__main__":
    main()