# 감성사전 

In [None]:
import pandas as pd
from wordcloud import WordCloud
import matplotlib.pyplot as plt

# ==========================================================
# 0. 감성 사전 정의 (senti score = polarity 계산용)
# ==========================================================
sentiment_words = {
    'pos_strong': {'score': 3.0, 'keywords': [
        "상승","급등","반등","경신","갱신","돌파","강세","최고치",
        "사상","성공","수혜","확보","활성화","호황","실현",
        "뛰어나","우세","우월","최고점","상승효과","완전한"
    ]},
    'pos_weak': {'score': 1.0, 'keywords': [
        "회복","보합","혼조","강보합","약보합","기대","전망",
        "가치","개선","개선된","개선되는","도움이 되는","안정된",
        "순조롭게","우호적","증대","추진","본격적인","정성",
        "적극","적극적으로","활력","이로운","인상적인","명성",
        "장점","바닥","가치 있는"
    ]},
    'neg_weak': {'score': -1.5, 'keywords': [
        "약세","우려","부정적","부정적인","부적합한","불리한",
        "공허한","결함","과적","둔화","마이너스","몰수","미완성",
        "배상","부주의","약점","약화","여파","연기","우발적",
        "의심","의혹","잘못","정체","조치","주의","지나친",
        "지독한","지연","질타","차질","침체","혼란","하락"
    ]},
    'neg_strong': {'score': -4.5, 'keywords': [
        "급락","폭락","추락","붕괴","급감","하락",
        "부진","실패","파산","해체","충격","훼손","극심한","저해",
        "스캔들","투자 회수","저품질","소란"
    ]}
}

# 주체 키워드
subject_terms = ["sk", "하이닉스", "당사", "동사"]


# ==========================================================
# 1. 점수 계산 함수
# ==========================================================
def get_senti(title):
    """감성 사전 기반 polarity 계산"""
    if not isinstance(title, str):
        return 1
    score = 0
    for block in sentiment_words.values():
        for kw in block["keywords"]:
            if kw in title:
                score += block["score"]
    return score if score != 0 else 1


def get_ratio(title, df_weight):
    """키워드 비중(ratio) 합산"""
    if not isinstance(title, str):
        return 0
    s = 0
    for _, row in df_weight.iterrows():
        if str(row["keyword"]) in title:
            s += float(row["ratio"])
    return s


def get_subject(title):
    """주체 관련 여부 점수"""
    if not isinstance(title, str):
        return 0.5
    return 1.0 if any(s in title for s in subject_terms) else 0.5


def calculate_final_score(title, df_weight):
    """final_score = (ratio × senti) + subject"""
    senti = get_senti(title)
    ratio = get_ratio(title, df_weight)
    subject = get_subject(title)
    final = (ratio * senti) + subject
    return final, senti


# ==========================================================
# 2. 색상 매핑 — 분위수(Q1/Q2/Q3) 기반 4단계
# ==========================================================
def get_quantile_colors(df, score_col="final_score"):
    q1 = df[score_col].quantile(0.25)
    q2 = df[score_col].quantile(0.50)
    q3 = df[score_col].quantile(0.75)
    return q1, q2, q3


def map_color_quantile(score, q1, q2, q3):
    if score <= q1:
        return "rgb(200,0,0)"        # 하위 25% (빨강)
    elif score <= q2:
        return "rgb(255,150,150)"    # 25~50% (핑크)
    elif score <= q3:
        return "rgb(140,170,255)"    # 50~75% (하늘)
    else:
        return "rgb(0,0,180)"        # 상위 25% (파랑)


# ==========================================================
# 3. 데이터 로드
# ==========================================================
def load_quarter(q):
    base = "../mini_project1/preprocessed_data"

    df_news = pd.read_csv(f"{base}/{q}_일자별_전처리1.csv", encoding="utf-8-sig")
    df_price = pd.read_csv(f"{base}/{q} 주가 데이터.csv", encoding="utf-8-sig")
    df_weight = pd.read_csv(f"{base}/{q}_키워드_top30.csv", encoding="utf-8-sig")

    for df in [df_news, df_price]:
        df.columns = df.columns.str.replace("﻿", "", regex=False)
        df["date"] = pd.to_datetime(df["date"], errors="coerce")

    return df_news, df_price, df_weight


quarters = ["1분기", "2분기", "3분기", "4분기"]