In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
import re
import random
import networkx as nx
from tqdm import tqdm
from scipy.sparse import coo_matrix
import community as community_louvain
from collections import defaultdict
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity
from gensim.models import Word2Vec


RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)
random.seed(RANDOM_STATE)


df1 = pd.read_parquet('/Users/kookbab/Desktop/연구 정리 KCI/코드/04.making_graphs/numerical_values/COS_bert_word_embed.parquet', engine='pyarrow').set_index('word')
df1.columns = df1.index

df2 = pd.read_parquet('/Users/kookbab/Desktop/연구 정리 KCI/코드/04.making_graphs/numerical_values/COS_bert_word_topic_sim.parquet', engine='pyarrow').set_index('word')
df2.columns = df2.index

df3 = pd.read_parquet('/Users/kookbab/Desktop/연구 정리 KCI/코드/04.making_graphs/numerical_values/COS_lda_word_topic_prob.parquet', engine='pyarrow').set_index('word')
df3.columns = df3.index


# 단어 순서 섞어주기
words_shuffled = df1.index.tolist()
random.shuffle(words_shuffled)
df1 = df1.loc[words_shuffled, words_shuffled]
df2 = df2.loc[words_shuffled, words_shuffled]
df3 = df3.loc[words_shuffled, words_shuffled]

# 0 제거
df1[df1 < 0] = 0
df2[df2 < 0] = 0
df3[df3 < 0] = 0

# Make Graph!

## individual graphs

# threshold 없음


In [2]:
# 함수: 유사도 행렬 → 단일 그래프 생성
def create_graph_from_df(df, label):
    G = nx.Graph()
    words = df.index.tolist()
    G.add_nodes_from(words)
    
    for i, word_i in tqdm(enumerate(df.index), total=len(df), desc=f"Creating {label}"):
        for j in range(i + 1, len(df.columns)):
            word_j = df.columns[j]
            weight = df.iat[i, j]
            G.add_edge(word_i, word_j, weight=weight, type=label)
    
    return G

# 개별 그래프 생성
G1 = create_graph_from_df(df1, 'sim1')
G2 = create_graph_from_df(df2, 'sim2')
G3 = create_graph_from_df(df3, 'sim3')

# 결과 출력
print(f"G1 (sim1) - 노드 수: {G1.number_of_nodes()}, 엣지 수: {G1.number_of_edges()}")
print(f"G2 (sim2) - 노드 수: {G2.number_of_nodes()}, 엣지 수: {G2.number_of_edges()}")
print(f"G3 (sim3) - 노드 수: {G3.number_of_nodes()}, 엣지 수: {G3.number_of_edges()}")


Creating sim1: 100%|██████████| 2597/2597 [00:28<00:00, 92.05it/s] 
Creating sim2: 100%|██████████| 2597/2597 [00:26<00:00, 98.98it/s] 
Creating sim3: 100%|██████████| 2597/2597 [00:26<00:00, 97.47it/s] 

G1 (sim1) - 노드 수: 2597, 엣지 수: 3370906
G2 (sim2) - 노드 수: 2597, 엣지 수: 3370906
G3 (sim3) - 노드 수: 2597, 엣지 수: 3370906





In [3]:
# 모든 weight 값 추출
weights = [data['weight'] for _, _, data in G1.edges(data=True)]

# 통계값 계산
print(f"엣지 개수: {len(weights)}")
print(f"최소 weight: {np.min(weights)}")
print(f"최대 weight: {np.max(weights)}")
print(f"평균 weight: {np.mean(weights):.2f}")
print(f"중앙값 weight: {np.median(weights)}")
print(f"표준편차: {np.std(weights):.2f}")


엣지 개수: 3370906
최소 weight: 0.0
최대 weight: 0.9958657439066876
평균 weight: 0.60
중앙값 weight: 0.6054310913910537
표준편차: 0.17


In [4]:
# 모든 weight 값 추출
weights = [data['weight'] for _, _, data in G2.edges(data=True)]

# 통계값 계산
print(f"엣지 개수: {len(weights)}")
print(f"최소 weight: {np.min(weights)}")
print(f"최대 weight: {np.max(weights)}")
print(f"평균 weight: {np.mean(weights):.2f}")
print(f"중앙값 weight: {np.median(weights)}")
print(f"표준편차: {np.std(weights):.2f}")


엣지 개수: 3370906
최소 weight: 0.0883820220823126
최대 weight: 0.9999488179664748
평균 weight: 0.98
중앙값 weight: 0.9847848778110468
표준편차: 0.03


In [5]:
# 모든 weight 값 추출
weights = [data['weight'] for _, _, data in G3.edges(data=True)]

# 통계값 계산
print(f"엣지 개수: {len(weights)}")
print(f"최소 weight: {np.min(weights)}")
print(f"최대 weight: {np.max(weights)}")
print(f"평균 weight: {np.mean(weights):.2f}")
print(f"중앙값 weight: {np.median(weights)}")
print(f"표준편차: {np.std(weights):.2f}")


엣지 개수: 3370906
최소 weight: 0.0005424193774261279
최대 weight: 0.999999508369228
평균 weight: 0.19
중앙값 weight: 0.1265978945886176
표준편차: 0.18


## 각 그래프에 대한 community detection

In [6]:
def get_louvain_partition(G, label, res):
    partition = community_louvain.best_partition(G, weight='weight', resolution=res, random_state=RANDOM_STATE)
    return pd.DataFrame(list(partition.items()), columns=['word', f'community_{label}'])

df_comm1 = get_louvain_partition(G1, 'sim1', 1.2) # 1.2
df_comm2 = get_louvain_partition(G2, 'sim2', 1.01) # 1.01
df_comm3 = get_louvain_partition(G3, 'sim3', 3.3) # 3.3 

print(f"sim1 커뮤니티 수: {df_comm1['community_sim1'].nunique()}")
print(f"sim2 커뮤니티 수: {df_comm2['community_sim2'].nunique()}")
print(f"sim3 커뮤니티 수: {df_comm3['community_sim3'].nunique()}")

sim1 커뮤니티 수: 821
sim2 커뮤니티 수: 893
sim3 커뮤니티 수: 626


In [7]:
# 단어 기준 병합
df_merged = df_comm1.merge(df_comm2, on='word').merge(df_comm3, on='word')
df_merged = df_merged.set_index('word')


In [8]:
df_merged

Unnamed: 0_level_0,community_sim1,community_sim2,community_sim3
word,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
인수,0,184,58
주민,1,617,290
당부,2,378,509
시작,3,3,3
오픈,4,4,4
...,...,...,...
사건,498,617,592
생태,156,184,518
수용,789,576,480
검사,29,281,175


In [9]:
import gc

del df_comm1, df_comm2, df_comm3, G1, G2, G3
gc.collect()


18834

# Vectorize

## OneHotEncoder (Best)

In [10]:
encoder = OneHotEncoder(sparse_output=False)  
community_vectors = encoder.fit_transform(df_merged)
pd.DataFrame(community_vectors, index=df_merged.index)

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,2330,2331,2332,2333,2334,2335,2336,2337,2338,2339
word,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
인수,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
주민,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
당부,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
시작,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
오픈,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
사건,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
생태,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
수용,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
검사,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
encoder = OneHotEncoder(sparse_output=False)  
community_vectors = encoder.fit_transform(df_merged)
community_vectors = pd.DataFrame(community_vectors, index=df_merged.index)

sim_matrix = cosine_similarity(community_vectors.values)

G_cluster = nx.Graph()
words = community_vectors.index.tolist()
G_cluster.add_nodes_from(words)

threshold = 0.3 
for i in tqdm(range(len(words)), desc="Building new graph"):
    for j in range(i + 1, len(words)):
        sim = sim_matrix[i, j]
        if sim >= threshold:
            G_cluster.add_edge(words[i], words[j], weight=sim)


print(f"G_cluster - 노드 수: {G_cluster.number_of_nodes()}, 엣지 수: {G_cluster.number_of_edges()}")

Building new graph: 100%|██████████| 2597/2597 [00:00<00:00, 3278.94it/s]

G_cluster - 노드 수: 2597, 엣지 수: 291983





In [12]:
# 모든 weight 값 추출
weights = [data['weight'] for _, _, data in G_cluster.edges(data=True)]

# 통계값 계산
print(f"엣지 개수: {len(weights)}")
print(f"최소 weight: {np.min(weights)}")
print(f"최대 weight: {np.max(weights)}")
print(f"평균 weight: {np.mean(weights):.2f}")
print(f"중앙값 weight: {np.median(weights)}")
print(f"표준편차: {np.std(weights):.2f}")


엣지 개수: 291983
최소 weight: 0.3333333333333334
최대 weight: 1.0000000000000002
평균 weight: 0.37
중앙값 weight: 0.3333333333333334
표준편차: 0.11


In [13]:
pd.DataFrame(weights)[0].value_counts()

0
0.333333    260443
0.666667     30613
1.000000       927
Name: count, dtype: int64

# Community Detection

In [14]:
res_range = np.arange(2.0, 3.8, 0.1)

resolutions = [round(x, 1) for x in res_range]

resolution_results = {}  # 결과 저장용 딕셔너리

for res in tqdm(resolutions, desc="Louvain by resolution"):
    print(f"\n================ Resolution: {res:.1f} ================")

    partition = community_louvain.best_partition(G_cluster, weight='weight', resolution=res, random_state=RANDOM_STATE)
    num_communities = len(set(partition.values()))

    community_groups = defaultdict(list)
    for node, comm_id in partition.items():
        community_groups[comm_id].append(node)

    community_groups = dict(sorted(community_groups.items(), key=lambda x: len(x[1]), reverse=True))

    # 결과 저장
    resolution_results[res] = {
        'num_communities': num_communities,
        'communities': community_groups
    }

    print(f"총 커뮤니티 수: {num_communities}")


Louvain by resolution:   0%|          | 0/18 [00:00<?, ?it/s]




Louvain by resolution:   6%|▌         | 1/18 [00:02<00:36,  2.12s/it]

총 커뮤니티 수: 122



Louvain by resolution:  11%|█         | 2/18 [00:05<00:45,  2.85s/it]

총 커뮤니티 수: 122



Louvain by resolution:  17%|█▋        | 3/18 [00:07<00:38,  2.58s/it]

총 커뮤니티 수: 126



Louvain by resolution:  22%|██▏       | 4/18 [00:10<00:35,  2.53s/it]

총 커뮤니티 수: 130



Louvain by resolution:  28%|██▊       | 5/18 [00:12<00:32,  2.50s/it]

총 커뮤니티 수: 131



Louvain by resolution:  33%|███▎      | 6/18 [00:14<00:27,  2.26s/it]

총 커뮤니티 수: 130



Louvain by resolution:  39%|███▉      | 7/18 [00:16<00:24,  2.19s/it]

총 커뮤니티 수: 131



Louvain by resolution:  44%|████▍     | 8/18 [00:18<00:21,  2.20s/it]

총 커뮤니티 수: 131



Louvain by resolution:  50%|█████     | 9/18 [00:21<00:20,  2.31s/it]

총 커뮤니티 수: 132



Louvain by resolution:  56%|█████▌    | 10/18 [00:24<00:19,  2.48s/it]

총 커뮤니티 수: 133



Louvain by resolution:  61%|██████    | 11/18 [00:27<00:18,  2.61s/it]

총 커뮤니티 수: 134



Louvain by resolution:  67%|██████▋   | 12/18 [00:29<00:14,  2.49s/it]

총 커뮤니티 수: 136



Louvain by resolution:  72%|███████▏  | 13/18 [00:30<00:11,  2.21s/it]

총 커뮤니티 수: 136



Louvain by resolution:  78%|███████▊  | 14/18 [00:33<00:08,  2.24s/it]

총 커뮤니티 수: 140



Louvain by resolution:  83%|████████▎ | 15/18 [00:35<00:06,  2.19s/it]

총 커뮤니티 수: 139



Louvain by resolution:  89%|████████▉ | 16/18 [00:37<00:04,  2.17s/it]

총 커뮤니티 수: 142



Louvain by resolution:  94%|█████████▍| 17/18 [00:38<00:02,  2.02s/it]

총 커뮤니티 수: 145



Louvain by resolution: 100%|██████████| 18/18 [00:41<00:00,  2.28s/it]

총 커뮤니티 수: 147





In [15]:
def print_communities_by_resolution(res):
    print(f"\n Resolution: {res}")
    result = resolution_results.get(res)
    
    if result is None:
        print("해당 resolution 값의 결과가 없습니다.")
        return

    print(f"총 커뮤니티 수: {result['num_communities']}")
    
    for comm_id, words in result['communities'].items():
        print(f"\nCommunity {comm_id} ({len(words)}개 단어)")
        print(', '.join(words))


In [24]:
print_communities_by_resolution(3.0)


 Resolution: 3.0
총 커뮤니티 수: 134

Community 15 (275개 단어)
인수, 부동산, 이론, 대기업, 위험성, 성취, 의견, 노력, 가격, 얼마, 저작권, 심사, 안전, 혁신적, 몸값, 돌파구, 초점, 결과, 허용, 영업, 영향력, 체크, 홍보, 경쟁력, 필요성, 수집, 핵심적, 경제적, 기업가치, 소재, 자금, 책임, 투자금, 계약서, 성공, 과금, 중요성, 규모, 크레딧, 게재, 빅데이터, 필수적, 상금, 사업자, 표준, 성공적, 자본, 저장, 전략, 제품, 해양생물, 완성도, 충족, 자연, 결제, 제조, 자원, 점유율, 창업, 산업현장, 사용률, 편향성, 한계, 편의성, 추정, 무기, 부문장, 통합, 유산, 복지, 공장, 진단, 경쟁, 기업가, 획기적, 설문조사, 동기, 안정성, 지급, 선정, 효율성, 식당, 가치, 정상, 체계, 레벨, 효율, 급성장, 수요, 보험, 자본주의, 소득, 마케팅, 금융, 보수, 조직, 신뢰도, 창업자, 제조업, 품질, 성장, 측정, 수입, 신경망, 금액, 자율, 승부, 보상, 성장세, 블록체인, 성취도, 매출, 성공사례, 합리적, 혁신, 유통, 예비, 인기, 경영학, 지속가능, 영업이익, 벤처, 대규모, 수익률, 환경, 선택, 계산, 생태계, 지역균형발전, 성과, 효율화, 전기, 기록, 결정, 자회사, 개선, 주식, 산업혁명, 투자, 발견, 카드, 계정, 매출액, 업그레이드, 사유, 대량, 기업들, 점검, 혁신상, 대상, 쇼핑, 취약, 비용, 증명, 거래, 보호, 자신감, 속도, 자산, 경제성, 사업화, 상점, 요금, 수익화, 수익, 용량, 상품, 완성, 보급, 특허, 재정, 극대화, 최소화, 제작, 성취기준, 개척, 현업, 매장, 효율적, 인상, 예산, 전문가들, 수익성, 농업, 실험, 최대, 스토어, 시험, 비즈니스, 평균, 정확성, 고용, 예방, 계약, 포인트, 정보, 윤리적, 가치관, 맛집, 기업간거래, 핵심, 대형, 예측, 공급, 업계, 수출, 통계, 능동적, 임금, 에너지, 위험, 경

# Modularity

In [17]:
for i in res_range:
    partition = community_louvain.best_partition(G_cluster, weight='weight', resolution=round(i,2), random_state=RANDOM_STATE)
    modularity = community_louvain.modularity(partition, G_cluster, weight='weight')
    print(f"resolution:{round(i,2)}", "->", f"Modularity: {modularity:.4f}")

resolution:2.0 -> Modularity: 0.5839
resolution:2.1 -> Modularity: 0.5832
resolution:2.2 -> Modularity: 0.5099
resolution:2.3 -> Modularity: 0.4864
resolution:2.4 -> Modularity: 0.4805
resolution:2.5 -> Modularity: 0.4518
resolution:2.6 -> Modularity: 0.4457
resolution:2.7 -> Modularity: 0.4486
resolution:2.8 -> Modularity: 0.4348
resolution:2.9 -> Modularity: 0.4365
resolution:3.0 -> Modularity: 0.4091
resolution:3.1 -> Modularity: 0.3904
resolution:3.2 -> Modularity: 0.3741
resolution:3.3 -> Modularity: 0.3701
resolution:3.4 -> Modularity: 0.3694
resolution:3.5 -> Modularity: 0.3690
resolution:3.6 -> Modularity: 0.3667
resolution:3.7 -> Modularity: 0.3622


In [None]:
# import json

# modularity_scores = {
#     "modularity": community_louvain.modularity(community_louvain.best_partition(G_cluster, weight='weight', resolution=3.0, random_state=RANDOM_STATE), 
#                                                    G_cluster, weight='weight'),
# }

# with open("/Users/kookbab/Desktop/연구 정리 KCI/코드/05. Metrics/modularities/meta_community.json", "w") as f:
#     json.dump(modularity_scores, f, indent=4)

# print("✅ Modularity scores saved.")


✅ Modularity scores saved.


# Coherence

In [19]:
import pickle

# 불러오기
with open("/Users/kookbab/Desktop/연구 정리 KCI/코드/04.making_graphs/tokenized_docs.pkl", "rb") as f:
    tokenized_docs = pickle.load(f)

# Save Coherence

In [20]:
import json
from gensim.models.coherencemodel import CoherenceModel
from gensim.corpora import Dictionary


for i in res_range:

    # 원하는 resolution 값 지정
    selected_resolution = round(i,2)

    # resolution 결과에서 커뮤니티 단어 리스트 추출
    communities = resolution_results[selected_resolution]['communities']

    # 10개 미만 커뮤니티 제외
    filtered_communities = [words for words in communities.values() if len(words) >= 10]

    # Dictionary 및 Corpus 생성
    dictionary = Dictionary(tokenized_docs)
    corpus = [dictionary.doc2bow(text) for text in tokenized_docs]

    # 다양한 coherence metric 계산
    coherence_scores = {}
    metrics = ['c_v', 'u_mass', 'c_uci', 'c_npmi']
    for metric in metrics:
        cm = CoherenceModel(
            topics=filtered_communities,
            texts=tokenized_docs,
            corpus=corpus if metric in ['u_mass'] else None,
            dictionary=dictionary,
            coherence=metric
        )
        coherence_scores[metric] = cm.get_coherence()

    # Word2Vec coherence 추가 계산
    try:
        from gensim.models import Word2Vec
        # 학습된 Word2Vec 모델 불러오기 또는 생성 (예: w2v_model)
        w2v_model = Word2Vec(tokenized_docs, vector_size=100, window=5, min_count=2, workers=4, epochs=10, seed=RANDOM_STATE)

        cm_w2v = CoherenceModel(
            topics=filtered_communities,
            texts=tokenized_docs,
            dictionary=dictionary,
            coherence='c_w2v',
            keyed_vectors=w2v_model.wv
        )
        coherence_scores['c_w2v'] = cm_w2v.get_coherence()

    except Exception as e:
        print("⚠️ Word2Vec coherence 계산 실패:", e)
        coherence_scores['c_w2v'] = None

    print(coherence_scores)


{'c_v': 0.5008508993689544, 'u_mass': -11.219104709652585, 'c_uci': -11.792623395070565, 'c_npmi': -0.4081514224331184, 'c_w2v': 0.70552087}
{'c_v': 0.4971438496233945, 'u_mass': -11.258154697615266, 'c_uci': -11.839864122948853, 'c_npmi': -0.40974890423923266, 'c_w2v': 0.70555997}
{'c_v': 0.48229195131156755, 'u_mass': -11.006429562467204, 'c_uci': -11.624715344826027, 'c_npmi': -0.39914508110253843, 'c_w2v': 0.7099032}
{'c_v': 0.48141870200659714, 'u_mass': -11.169947674519403, 'c_uci': -11.553437930691802, 'c_npmi': -0.39577585921709174, 'c_w2v': 0.7214625}
{'c_v': 0.4788512014906908, 'u_mass': -11.225429541695672, 'c_uci': -11.5572292612852, 'c_npmi': -0.3960661055694669, 'c_w2v': 0.7230113}
{'c_v': 0.46790658958575526, 'u_mass': -10.793227360047036, 'c_uci': -11.500730326153153, 'c_npmi': -0.3944050625663619, 'c_w2v': 0.72297865}
{'c_v': 0.4711978791004278, 'u_mass': -10.933839157534443, 'c_uci': -11.569526198813232, 'c_npmi': -0.39705577794568014, 'c_w2v': 0.7231285}
{'c_v': 0.46

In [26]:
# import json

# # 원하는 resolution 값 지정
# selected_resolution = 3.0

# # resolution 결과에서 커뮤니티 단어 리스트 추출
# communities = resolution_results[selected_resolution]['communities']

# # 10개 미만 커뮤니티 제외
# filtered_communities = [words for words in communities.values() if len(words) >= 10]

# # Dictionary 및 Corpus 생성
# dictionary = Dictionary(tokenized_docs)
# corpus = [dictionary.doc2bow(text) for text in tokenized_docs]

# # 다양한 coherence metric 계산
# coherence_scores = {}
# metrics = ['c_v', 'u_mass', 'c_uci', 'c_npmi']
# for metric in metrics:
#     cm = CoherenceModel(
#         topics=filtered_communities,
#         texts=tokenized_docs,
#         corpus=corpus if metric in ['u_mass'] else None,
#         dictionary=dictionary,
#         coherence=metric
#     )
#     coherence_scores[metric] = cm.get_coherence()


# try:
    
#     w2v_model = Word2Vec(tokenized_docs, vector_size=100, window=5, min_count=2, workers=4, epochs=10,  seed=RANDOM_STATE)

#     cm_w2v = CoherenceModel(
#         topics=filtered_communities,
#         texts=tokenized_docs,
#         dictionary=dictionary,
#         coherence='c_w2v',
#         keyed_vectors=w2v_model.wv
#     )
#     coherence_scores['c_w2v'] = cm_w2v.get_coherence().astype('float64')

# except Exception as e:
#     print("Word2Vec coherence 계산 실패:", e)
#     coherence_scores['c_w2v'] = None


# output_path = f"/Users/kookbab/Desktop/연구 정리 KCI/코드/05. Metrics/coherence/meta_community.json"
# with open(output_path, "w") as f:
#     json.dump(coherence_scores, f, indent=4)

# print(f"Coherence scores saved to {output_path}")

# coherence_scores



Coherence scores saved to /Users/kookbab/Desktop/연구 정리 KCI/코드/05. Metrics/coherence/meta_community.json


{'c_v': 0.46792092553162534,
 'u_mass': -10.540254422246292,
 'c_uci': -11.408181114604464,
 'c_npmi': -0.389791661499657,
 'c_w2v': 0.7258339524269104}