In [None]:
import streamlit as st
import pandas as pd
import requests
from sentence_transformers import SentenceTransformer, util

# Load preprocessed YouTube channel data
@st.cache_data

def load_data():
    df = pd.read_csv("youtube_channel_template.csv", encoding="cp949")
    df = df.dropna(subset=["태그", "스타일"]).copy()
    df["combined"] = df["태그"] + "," + df["스타일"]
    return df

# Load KoSBERT model
@st.cache_resource

def load_model():
    return SentenceTransformer("jhgan/ko-sbert-nli")

# Build embedding matrix
@st.cache_data

def build_embeddings(model, texts):
    return model.encode(texts, convert_to_tensor=True)

# Recommend using sentence embeddings
def recommend_channels_with_kosbert(df, model, embeddings, user_input, top_n=5):
    input_vec = model.encode(user_input, convert_to_tensor=True)
    cosine_scores = util.pytorch_cos_sim(input_vec, embeddings)[0]
    df = df.copy()
    df["similarity"] = cosine_scores.cpu().numpy()
    return df.sort_values(by="similarity", ascending=False).head(top_n)

# Streamlit UI
st.set_page_config(page_title="유튜브 채널 추천기", layout="wide")
st.title("\U0001F4FA 나에게 딱 맞는 유튜브 채널 추천")
st.markdown("검색창에 관심 키워드를 입력하면 유사한 유튜브 채널을 추천해드립니다 ✨")

user_input = st.text_input("\U0001F50D 키워드를 입력하세요 (예: 심리학, 운동, 감성 브이로그)")
sort_by_followers = st.toggle("구독자 수 높은 순 정렬", value=False)

if user_input:
    df = load_data()
    model = load_model()
    embeddings = build_embeddings(model, df["combined"].tolist())
    recommendations = recommend_channels_with_kosbert(df, model, embeddings, user_input)

    if sort_by_followers:
        recommendations["구독자 수"] = recommendations["구독자 수"].str.replace("만", "e4").str.replace("억", "e8").str.extract(r'(\d+\.?\d*)e?(\d*)').fillna("0")
        recommendations["구독자 수"] = recommendations[0].astype(float) * 10 ** recommendations[1].astype(int)
        recommendations = recommendations.sort_values(by="구독자 수", ascending=False)

    st.subheader("\U0001F4DD 추천 채널")

    for idx, row in recommendations.iterrows():
        with st.container():
            cols = st.columns([1, 3])
            placeholder_img = f"https://via.placeholder.com/150x150.png?text={row['채널명'].replace(' ', '+')}"
            cols[0].image(placeholder_img, width=100)

            with cols[1]:
                st.markdown(f"### [{row['채널명']}](https://www.youtube.com/results?search_query={row['채널명'].replace(' ', '+')})")
                st.markdown(f"**카테고리:** {row['카테고리']}  ")
                st.markdown(f"**구독자 수:** {row['구독자 수']}  ")
                st.markdown(f"**스타일:** {row['스타일']}  ")
                st.markdown(f"**태그:** {row['태그']}  ")
                st.markdown(f"**유사도 점수:** {row['similarity']:.2f}")

            st.markdown("---")
