In [1]:
import networkx as nx
import numpy as np
import pandas as pd
from cartoframes.viz import *
import torch
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv, GATConv
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
import pickle
import random

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# 초기 노드 임베딩 가져오기
def get_basic_embeddings(path, dong_name):
    emb = pd.read_csv(path)
    #emb = standardize_dataframe(emb, dong_name)   # 정규화
    features = torch.tensor(emb.iloc[:, 1:].values, dtype=torch.float32)   # 임베딩만 추출
    return emb, features

In [5]:
# 그래프 불러오기
def get_graph(path, emb, dong_name):
    with open(path, 'rb') as f:
        G = pickle.load(f)
    edges = G.edges(data=True)   # 엣지 추출
    node_to_idx = {name: idx for idx, name in enumerate(emb[dong_name])}   # 동이름 -> 인덱스 변환
    edge_index = torch.tensor([[node_to_idx[edge[0]], node_to_idx[edge[1]]] for edge in edges], dtype=torch.long).T   # edge list 재생산
    edge_weight = torch.tensor([edge[2]['weight'] for edge in edges], dtype=torch.float32)
    
    return edge_index, edge_weight

In [46]:
# humanflow 임베딩 데이터
hf_emb = pd.read_csv('../../Dataset/Raw_Embeddings/HumanFlow_Embeddings_kor.csv')
group = hf_emb.groupby(['기준일ID'])

for key, df in group:
    print(key[0])
    print(type(key[0]))
    print(df.shape)
    print(df.columns)
    emb = df
    features = torch.tensor(emb.iloc[:, 2:].values, dtype=torch.float32)
    break

201701
<class 'int'>
(424, 37)
Index(['기준일ID', '행정동코드', '총생활인구수', '남자0세부터9세생활인구수', '남자10세부터14세생활인구수',
       '남자15세부터19세생활인구수', '남자20세부터24세생활인구수', '남자25세부터29세생활인구수',
       '남자30세부터34세생활인구수', '남자35세부터39세생활인구수', '남자40세부터44세생활인구수',
       '남자45세부터49세생활인구수', '남자50세부터54세생활인구수', '남자55세부터59세생활인구수',
       '남자60세부터64세생활인구수', '남자65세부터69세생활인구수', '남자70세이상생활인구수', '여자0세부터9세생활인구수',
       '여자10세부터14세생활인구수', '여자15세부터19세생활인구수', '여자20세부터24세생활인구수',
       '여자25세부터29세생활인구수', '여자30세부터34세생활인구수', '여자35세부터39세생활인구수',
       '여자40세부터44세생활인구수', '여자45세부터49세생활인구수', '여자50세부터54세생활인구수',
       '여자55세부터59세생활인구수', '여자60세부터64세생활인구수', '여자65세부터69세생활인구수', '여자70세이상생활인구수',
       '총생활인구수_LONG_FOREIGNER', '중국인체류인구수_LONG_FOREIGNER',
       '중국외외국인체류인구수_LONG_FOREIGNER', '총생활인구수_TEMP_FOREIGNER',
       '중국인체류인구수_TEMP_FOREIGNER', '중국외외국인체류인구수_TEMP_FOREIGNER'],
      dtype='object')


In [6]:
# human flow 그래프 데이터
basic_path = '../../Dataset/Human_flow_Graph/'
start_year = 2017
start_month = 1
end_year = 2022
end_month = 7

# 경로들 확인
path_list = [
    basic_path+f"{year}{month:02d}.gpickle"
    for year in range(start_year, end_year + 1)
    for month in range(1, 13)
    if not (year == end_year and month > end_month)
]

edge_index, edge_weight = get_graph(path_list[0], emb, '행정동코드')
edge_index

NameError: name 'emb' is not defined

In [32]:
data = Data(x=features, edge_index=edge_index, edge_weight=edge_weight)

# Check for very large or very small values
print(data.x.max(), data.x.min())  # 값 범위 확인
print(data.edge_weight.max(), data.edge_weight.min())

tensor(27.3508) tensor(-1.8878)
tensor(1.) tensor(1.)


In [7]:
# GCN 모델 정의
class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, x, edge_index, edge_weight):
        x = self.conv1(x, edge_index, edge_weight)
        x = F.relu(x)
        x = self.conv2(x, edge_index, edge_weight)
        return x

In [8]:
# GAT 모델 정의
class GAT(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, heads=1):
        super(GAT, self).__init__()
        self.conv1 = GATConv(in_channels, hidden_channels, heads=heads)
        self.conv2 = GATConv(hidden_channels * heads, out_channels, heads=1)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

In [9]:
# Negative Sampling 구현
def negative_sampling(edge_index, num_nodes, num_neg_samples):
    """Negative samples 생성"""
    edge_set = set(tuple(edge) for edge in edge_index.T.tolist())
    neg_samples = []
    while len(neg_samples) < num_neg_samples:
        i, j = np.random.randint(0, num_nodes, size=2)
        if i != j and (i, j) not in edge_set and (j, i) not in edge_set:
            neg_samples.append((i, j))
    return torch.tensor(neg_samples, dtype=torch.long).T

In [10]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  # 멀티 GPU를 사용하는 경우
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

In [36]:
# 학습 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data = data.to(device)

model_gcn = GCN(data.num_node_features, 64, 35).to(device)
model_gat = GAT(data.num_node_features, 64, 35, heads=4).to(device)  # heads=4는 각 노드에서 4개의 attention heads 사용

# 최적화 방법 설정
optimizer_gcn = torch.optim.Adam(model_gcn.parameters(), lr=0.01, weight_decay=5e-4)
optimizer_gat = torch.optim.Adam(model_gat.parameters(), lr=0.01, weight_decay=5e-4)

In [11]:
def train_model(model, data, optimizer, num_epochs=200, patience=20, seed=42):
    """
    GCN 또는 GAT 모델 학습을 위한 함수 (Early Stopping 시 best_loss 기준으로 z 반환).
    
    Args:
        model (torch.nn.Module): 학습할 모델 (GCN 또는 GAT).
        data (torch_geometric.data.Data): 학습에 사용할 데이터.
        optimizer (torch.optim.Optimizer): 최적화 알고리즘.
        num_epochs (int): 학습할 에폭 수.
        patience (int): Early Stopping 기준.
        seed (int): 재현 가능한 결과를 위한 Seed 값.

    Returns:
        torch.Tensor: `best_loss` 기준으로 저장된 노드 임베딩.
    """
    # Seed 고정
    set_seed(seed)

    best_loss = float('inf')
    counter = 0
    best_z = None  # `best_loss` 시점의 임베딩 저장

    for epoch in tqdm(range(num_epochs), desc="Training Progress"):
        model.train()
        optimizer.zero_grad()

        # Forward pass
        if isinstance(model, GAT):  # GAT 모델에는 edge_weight 사용하지 않음
            z = model(data.x, data.edge_index)
        else:  # GCN 모델에는 edge_weight 전달
            z = model(data.x, data.edge_index, data.edge_weight)

        # Positive edges
        pos_edges = data.edge_index.T
        pos_scores = (z[pos_edges[:, 0]] * z[pos_edges[:, 1]]).sum(dim=1)

        # Negative edges
        neg_edges = negative_sampling(data.edge_index, data.num_nodes, pos_edges.size(0) // 2)
        neg_scores = (z[neg_edges[0]] * z[neg_edges[1]]).sum(dim=1)

        # Loss 계산
        pos_loss = -F.logsigmoid(pos_scores).mean()
        neg_loss = -F.logsigmoid(-neg_scores).mean()
        loss = pos_loss + neg_loss

        loss.backward()
        optimizer.step()

        # Loss 출력
        tqdm.write(f"Epoch {epoch + 1}, Loss: {loss.item()}")

        # Early Stopping 로직
        if loss.item() < best_loss:
            best_loss = loss.item()  # 최적 Loss 갱신
            best_z = z.detach().cpu()  # `best_loss` 시점의 임베딩 저장
            counter = 0  # 카운터 초기화
        else:
            counter += 1

        if counter >= patience:
            tqdm.write(f"Early stopping at epoch {epoch + 1}")
            break

    return best_z  # 최적 Loss 기준의 임베딩 반환

In [41]:
z_gcn = train_model(model_gcn, data, optimizer_gcn, num_epochs=200, patience=20)
z_gat = train_model(model_gat, data, optimizer_gat, num_epochs=200, patience=20)

Training Progress:   1%|          | 2/200 [00:00<01:23,  2.36it/s]

Epoch 1, Loss: 2.1354637145996094
Epoch 2, Loss: 1.115706443786621


Training Progress:   2%|▏         | 3/200 [00:01<00:56,  3.51it/s]

Epoch 3, Loss: 1.0097235441207886


Training Progress:   2%|▎         | 5/200 [00:01<00:39,  4.91it/s]

Epoch 4, Loss: 1.0428000688552856
Epoch 5, Loss: 1.0593677759170532


Training Progress:   4%|▎         | 7/200 [00:01<00:29,  6.54it/s]

Epoch 6, Loss: 1.0102813243865967
Epoch 7, Loss: 0.9528412222862244


Training Progress:   4%|▍         | 8/200 [00:01<00:26,  7.20it/s]

Epoch 8, Loss: 0.8947300314903259


Training Progress:   5%|▌         | 10/200 [00:02<00:27,  6.94it/s]

Epoch 9, Loss: 0.8207608461380005
Epoch 10, Loss: 0.7539094686508179


Training Progress:   6%|▌         | 12/200 [00:02<00:23,  7.96it/s]

Epoch 11, Loss: 0.6943907737731934
Epoch 12, Loss: 0.6431031823158264


Training Progress:   6%|▋         | 13/200 [00:02<00:22,  8.25it/s]

Epoch 13, Loss: 0.5923910140991211


Training Progress:   8%|▊         | 15/200 [00:02<00:25,  7.28it/s]

Epoch 14, Loss: 0.5701874494552612
Epoch 15, Loss: 0.5556251406669617


Training Progress:   8%|▊         | 17/200 [00:02<00:22,  8.13it/s]

Epoch 16, Loss: 0.5291643142700195
Epoch 17, Loss: 0.496028333902359


Training Progress:   9%|▉         | 18/200 [00:03<00:21,  8.39it/s]

Epoch 18, Loss: 0.4720470607280731


Training Progress:  10%|█         | 20/200 [00:03<00:24,  7.47it/s]

Epoch 19, Loss: 0.44018933176994324
Epoch 20, Loss: 0.4217197597026825


Training Progress:  11%|█         | 22/200 [00:03<00:21,  8.26it/s]

Epoch 21, Loss: 0.40632203221321106
Epoch 22, Loss: 0.4015810489654541


Training Progress:  12%|█▏        | 23/200 [00:03<00:20,  8.53it/s]

Epoch 23, Loss: 0.38407135009765625


Training Progress:  12%|█▎        | 25/200 [00:04<00:23,  7.52it/s]

Epoch 24, Loss: 0.37878549098968506
Epoch 25, Loss: 0.3475920557975769


Training Progress:  14%|█▎        | 27/200 [00:04<00:20,  8.32it/s]

Epoch 26, Loss: 0.33701154589653015
Epoch 27, Loss: 0.3135136663913727


Training Progress:  14%|█▍        | 28/200 [00:04<00:20,  8.50it/s]

Epoch 28, Loss: 0.311861127614975


Training Progress:  15%|█▌        | 30/200 [00:04<00:22,  7.43it/s]

Epoch 29, Loss: 0.29208457469940186
Epoch 30, Loss: 0.2784821093082428


Training Progress:  16%|█▌        | 32/200 [00:04<00:20,  8.18it/s]

Epoch 31, Loss: 0.26223811507225037
Epoch 32, Loss: 0.24996523559093475


Training Progress:  16%|█▋        | 33/200 [00:04<00:19,  8.46it/s]

Epoch 33, Loss: 0.235317662358284


Training Progress:  18%|█▊        | 35/200 [00:05<00:22,  7.47it/s]

Epoch 34, Loss: 0.23891428112983704
Epoch 35, Loss: 0.22042354941368103


Training Progress:  18%|█▊        | 37/200 [00:05<00:19,  8.20it/s]

Epoch 36, Loss: 0.21663498878479004
Epoch 37, Loss: 0.21777945756912231


Training Progress:  19%|█▉        | 38/200 [00:05<00:19,  8.48it/s]

Epoch 38, Loss: 0.20967595279216766


Training Progress:  20%|██        | 40/200 [00:05<00:21,  7.50it/s]

Epoch 39, Loss: 0.21292494237422943
Epoch 40, Loss: 0.20253396034240723


Training Progress:  21%|██        | 42/200 [00:06<00:19,  8.29it/s]

Epoch 41, Loss: 0.19464990496635437
Epoch 42, Loss: 0.18235021829605103


Training Progress:  22%|██▏       | 43/200 [00:06<00:18,  8.40it/s]

Epoch 43, Loss: 0.1803688406944275


Training Progress:  22%|██▎       | 45/200 [00:06<00:21,  7.36it/s]

Epoch 44, Loss: 0.1735670566558838
Epoch 45, Loss: 0.17238138616085052


Training Progress:  24%|██▎       | 47/200 [00:06<00:18,  8.12it/s]

Epoch 46, Loss: 0.16913238167762756
Epoch 47, Loss: 0.16457942128181458


Training Progress:  24%|██▍       | 48/200 [00:06<00:18,  8.39it/s]

Epoch 48, Loss: 0.16678157448768616


Training Progress:  25%|██▌       | 50/200 [00:07<00:20,  7.48it/s]

Epoch 49, Loss: 0.15711091458797455
Epoch 50, Loss: 0.15139596164226532


Training Progress:  26%|██▌       | 52/200 [00:07<00:17,  8.24it/s]

Epoch 51, Loss: 0.14590108394622803
Epoch 52, Loss: 0.1525520533323288


Training Progress:  26%|██▋       | 53/200 [00:07<00:17,  8.52it/s]

Epoch 53, Loss: 0.14914527535438538


Training Progress:  28%|██▊       | 55/200 [00:07<00:19,  7.47it/s]

Epoch 54, Loss: 0.1459175944328308
Epoch 55, Loss: 0.1500215381383896


Training Progress:  28%|██▊       | 57/200 [00:08<00:17,  8.26it/s]

Epoch 56, Loss: 0.14595021307468414
Epoch 57, Loss: 0.13914243876934052


Training Progress:  29%|██▉       | 58/200 [00:08<00:16,  8.49it/s]

Epoch 58, Loss: 0.135976642370224


Training Progress:  30%|███       | 60/200 [00:08<00:18,  7.46it/s]

Epoch 59, Loss: 0.12947814166545868
Epoch 60, Loss: 0.13670599460601807


Training Progress:  31%|███       | 62/200 [00:08<00:16,  8.22it/s]

Epoch 61, Loss: 0.12922097742557526
Epoch 62, Loss: 0.12759895622730255


Training Progress:  32%|███▏      | 63/200 [00:08<00:16,  8.48it/s]

Epoch 63, Loss: 0.12465859204530716


Training Progress:  32%|███▎      | 65/200 [00:09<00:17,  7.52it/s]

Epoch 64, Loss: 0.12416952848434448
Epoch 65, Loss: 0.11111512035131454


Training Progress:  34%|███▎      | 67/200 [00:09<00:16,  8.22it/s]

Epoch 66, Loss: 0.10687465965747833
Epoch 67, Loss: 0.11149969696998596


Training Progress:  34%|███▍      | 68/200 [00:09<00:15,  8.49it/s]

Epoch 68, Loss: 0.10608616471290588


Training Progress:  35%|███▌      | 70/200 [00:09<00:17,  7.48it/s]

Epoch 69, Loss: 0.10578081756830215
Epoch 70, Loss: 0.09877876192331314


Training Progress:  36%|███▌      | 72/200 [00:10<00:15,  8.17it/s]

Epoch 71, Loss: 0.092127226293087
Epoch 72, Loss: 0.08938117325305939


Training Progress:  36%|███▋      | 73/200 [00:10<00:15,  8.44it/s]

Epoch 73, Loss: 0.0918872058391571


Training Progress:  38%|███▊      | 75/200 [00:10<00:16,  7.43it/s]

Epoch 74, Loss: 0.08581650257110596
Epoch 75, Loss: 0.0856994017958641


Training Progress:  38%|███▊      | 77/200 [00:10<00:14,  8.26it/s]

Epoch 76, Loss: 0.08611573278903961
Epoch 77, Loss: 0.07665324956178665


Training Progress:  39%|███▉      | 78/200 [00:10<00:14,  8.51it/s]

Epoch 78, Loss: 0.08337391912937164


Training Progress:  40%|████      | 80/200 [00:11<00:16,  7.48it/s]

Epoch 79, Loss: 0.07727546989917755
Epoch 80, Loss: 0.07395976036787033


Training Progress:  41%|████      | 82/200 [00:11<00:14,  8.23it/s]

Epoch 81, Loss: 0.07599018514156342
Epoch 82, Loss: 0.07040078938007355


Training Progress:  42%|████▏     | 83/200 [00:11<00:13,  8.48it/s]

Epoch 83, Loss: 0.07118553668260574


Training Progress:  42%|████▎     | 85/200 [00:11<00:15,  7.46it/s]

Epoch 84, Loss: 0.07561086863279343
Epoch 85, Loss: 0.07317838072776794


Training Progress:  44%|████▎     | 87/200 [00:11<00:13,  8.24it/s]

Epoch 86, Loss: 0.06793869286775589
Epoch 87, Loss: 0.06646379828453064


Training Progress:  44%|████▍     | 88/200 [00:12<00:13,  8.38it/s]

Epoch 88, Loss: 0.06922707706689835


Training Progress:  45%|████▌     | 90/200 [00:12<00:14,  7.47it/s]

Epoch 89, Loss: 0.06948588788509369
Epoch 90, Loss: 0.06648965924978256


Training Progress:  46%|████▌     | 92/200 [00:12<00:13,  8.24it/s]

Epoch 91, Loss: 0.06523572653532028
Epoch 92, Loss: 0.06635776162147522


Training Progress:  46%|████▋     | 93/200 [00:12<00:12,  8.49it/s]

Epoch 93, Loss: 0.06492559611797333


Training Progress:  48%|████▊     | 95/200 [00:13<00:13,  7.52it/s]

Epoch 94, Loss: 0.06308520585298538
Epoch 95, Loss: 0.06224924698472023


Training Progress:  48%|████▊     | 97/200 [00:13<00:12,  8.22it/s]

Epoch 96, Loss: 0.06429429352283478
Epoch 97, Loss: 0.06103600934147835


Training Progress:  49%|████▉     | 98/200 [00:13<00:12,  8.47it/s]

Epoch 98, Loss: 0.06236238032579422


Training Progress:  50%|█████     | 100/200 [00:13<00:13,  7.44it/s]

Epoch 99, Loss: 0.06184686720371246
Epoch 100, Loss: 0.06176575645804405


Training Progress:  51%|█████     | 102/200 [00:13<00:11,  8.27it/s]

Epoch 101, Loss: 0.06509344279766083
Epoch 102, Loss: 0.055961593985557556


Training Progress:  52%|█████▏    | 103/200 [00:14<00:11,  8.50it/s]

Epoch 103, Loss: 0.06183828413486481


Training Progress:  52%|█████▎    | 105/200 [00:14<00:12,  7.44it/s]

Epoch 104, Loss: 0.05934746563434601
Epoch 105, Loss: 0.0568401962518692


Training Progress:  54%|█████▎    | 107/200 [00:14<00:11,  8.27it/s]

Epoch 106, Loss: 0.056578606367111206
Epoch 107, Loss: 0.06318898499011993


Training Progress:  54%|█████▍    | 108/200 [00:14<00:10,  8.54it/s]

Epoch 108, Loss: 0.06398270279169083


Training Progress:  55%|█████▌    | 110/200 [00:14<00:12,  7.49it/s]

Epoch 109, Loss: 0.058037497103214264
Epoch 110, Loss: 0.06648072600364685


Training Progress:  56%|█████▌    | 112/200 [00:15<00:10,  8.16it/s]

Epoch 111, Loss: 0.056092314422130585
Epoch 112, Loss: 0.06045781821012497


Training Progress:  56%|█████▋    | 113/200 [00:15<00:10,  8.42it/s]

Epoch 113, Loss: 0.05972164869308472


Training Progress:  57%|█████▊    | 115/200 [00:15<00:11,  7.47it/s]

Epoch 114, Loss: 0.05486926808953285
Epoch 115, Loss: 0.05445085093379021


Training Progress:  58%|█████▊    | 117/200 [00:15<00:10,  8.28it/s]

Epoch 116, Loss: 0.05534204840660095
Epoch 117, Loss: 0.05390399321913719


Training Progress:  59%|█████▉    | 118/200 [00:15<00:09,  8.55it/s]

Epoch 118, Loss: 0.05156257748603821


Training Progress:  60%|██████    | 120/200 [00:16<00:10,  7.43it/s]

Epoch 119, Loss: 0.054207488894462585
Epoch 120, Loss: 0.052759889513254166


Training Progress:  61%|██████    | 122/200 [00:16<00:09,  8.17it/s]

Epoch 121, Loss: 0.0554325133562088
Epoch 122, Loss: 0.054938867688179016


Training Progress:  62%|██████▏   | 123/200 [00:16<00:09,  8.46it/s]

Epoch 123, Loss: 0.050169412046670914


Training Progress:  62%|██████▎   | 125/200 [00:16<00:09,  7.51it/s]

Epoch 124, Loss: 0.049088530242443085
Epoch 125, Loss: 0.05248687416315079


Training Progress:  64%|██████▎   | 127/200 [00:17<00:08,  8.27it/s]

Epoch 126, Loss: 0.05607309192419052
Epoch 127, Loss: 0.05386054888367653


Training Progress:  64%|██████▍   | 128/200 [00:17<00:08,  8.40it/s]

Epoch 128, Loss: 0.047765329480171204


Training Progress:  65%|██████▌   | 130/200 [00:17<00:09,  7.44it/s]

Epoch 129, Loss: 0.050020620226860046
Epoch 130, Loss: 0.050658874213695526


Training Progress:  66%|██████▌   | 132/200 [00:17<00:08,  8.22it/s]

Epoch 131, Loss: 0.05532991513609886
Epoch 132, Loss: 0.051378291100263596


Training Progress:  66%|██████▋   | 133/200 [00:17<00:07,  8.49it/s]

Epoch 133, Loss: 0.05300726369023323


Training Progress:  68%|██████▊   | 135/200 [00:18<00:08,  7.45it/s]

Epoch 134, Loss: 0.05274936184287071
Epoch 135, Loss: 0.05317256227135658


Training Progress:  68%|██████▊   | 137/200 [00:18<00:07,  8.27it/s]

Epoch 136, Loss: 0.05025073140859604
Epoch 137, Loss: 0.0486728698015213


Training Progress:  69%|██████▉   | 138/200 [00:18<00:07,  8.52it/s]

Epoch 138, Loss: 0.05160260200500488


Training Progress:  70%|███████   | 140/200 [00:18<00:08,  7.42it/s]

Epoch 139, Loss: 0.04930621013045311
Epoch 140, Loss: 0.04788510501384735


Training Progress:  71%|███████   | 142/200 [00:19<00:07,  8.24it/s]

Epoch 141, Loss: 0.050163839012384415
Epoch 142, Loss: 0.04697822034358978


Training Progress:  72%|███████▏  | 143/200 [00:19<00:06,  8.49it/s]

Epoch 143, Loss: 0.04905080795288086


Training Progress:  72%|███████▎  | 145/200 [00:19<00:07,  7.45it/s]

Epoch 144, Loss: 0.05350249260663986
Epoch 145, Loss: 0.04975640028715134


Training Progress:  74%|███████▎  | 147/200 [00:19<00:06,  8.25it/s]

Epoch 146, Loss: 0.0498315803706646
Epoch 147, Loss: 0.04348865523934364


Training Progress:  74%|███████▍  | 148/200 [00:19<00:06,  8.53it/s]

Epoch 148, Loss: 0.048430100083351135


Training Progress:  75%|███████▌  | 150/200 [00:20<00:06,  7.50it/s]

Epoch 149, Loss: 0.04690152034163475
Epoch 150, Loss: 0.04664359986782074


Training Progress:  76%|███████▌  | 152/200 [00:20<00:05,  8.29it/s]

Epoch 151, Loss: 0.05560546740889549
Epoch 152, Loss: 0.048165708780288696


Training Progress:  76%|███████▋  | 153/200 [00:20<00:05,  8.52it/s]

Epoch 153, Loss: 0.048393987119197845


Training Progress:  78%|███████▊  | 155/200 [00:20<00:06,  7.42it/s]

Epoch 154, Loss: 0.049063462764024734
Epoch 155, Loss: 0.04526162147521973


Training Progress:  78%|███████▊  | 157/200 [00:20<00:05,  8.17it/s]

Epoch 156, Loss: 0.05009601637721062
Epoch 157, Loss: 0.04484699293971062


Training Progress:  79%|███████▉  | 158/200 [00:21<00:04,  8.43it/s]

Epoch 158, Loss: 0.04983343929052353


Training Progress:  80%|████████  | 160/200 [00:21<00:05,  7.35it/s]

Epoch 159, Loss: 0.047663673758506775
Epoch 160, Loss: 0.04432050883769989


Training Progress:  81%|████████  | 162/200 [00:21<00:04,  8.15it/s]

Epoch 161, Loss: 0.049137264490127563
Epoch 162, Loss: 0.05008848011493683


Training Progress:  82%|████████▏ | 163/200 [00:21<00:04,  8.42it/s]

Epoch 163, Loss: 0.04549910128116608


Training Progress:  82%|████████▎ | 165/200 [00:22<00:04,  7.49it/s]

Epoch 164, Loss: 0.04562351107597351
Epoch 165, Loss: 0.04768240451812744


Training Progress:  83%|████████▎ | 166/200 [00:22<00:04,  7.45it/s]


Epoch 166, Loss: 0.04715947061777115
Epoch 167, Loss: 0.044776685535907745
Early stopping at epoch 167


Training Progress:   0%|          | 1/200 [00:00<00:23,  8.62it/s]

Epoch 1, Loss: 1.9999516010284424


Training Progress:   2%|▏         | 3/200 [00:00<00:28,  6.91it/s]

Epoch 2, Loss: 4.337494373321533
Epoch 3, Loss: 1.524203896522522


Training Progress:   2%|▎         | 5/200 [00:00<00:24,  7.86it/s]

Epoch 4, Loss: 1.4566493034362793
Epoch 5, Loss: 3.044597625732422


Training Progress:   3%|▎         | 6/200 [00:00<00:23,  8.19it/s]

Epoch 6, Loss: 2.2909295558929443


Training Progress:   4%|▍         | 8/200 [00:01<00:26,  7.18it/s]

Epoch 7, Loss: 2.2383992671966553
Epoch 8, Loss: 1.4260671138763428


Training Progress:   5%|▌         | 10/200 [00:01<00:24,  7.87it/s]

Epoch 9, Loss: 2.971738815307617
Epoch 10, Loss: 1.2709591388702393


Training Progress:   6%|▌         | 11/200 [00:01<00:23,  8.09it/s]

Epoch 11, Loss: 1.5897432565689087


Training Progress:   6%|▋         | 13/200 [00:01<00:25,  7.27it/s]

Epoch 12, Loss: 1.7619150876998901
Epoch 13, Loss: 1.5702890157699585


Training Progress:   8%|▊         | 15/200 [00:01<00:22,  8.06it/s]

Epoch 14, Loss: 1.3815709352493286
Epoch 15, Loss: 1.2931816577911377


Training Progress:   8%|▊         | 16/200 [00:02<00:22,  8.31it/s]

Epoch 16, Loss: 1.2996149063110352


Training Progress:   9%|▉         | 18/200 [00:02<00:24,  7.35it/s]

Epoch 17, Loss: 1.3564592599868774
Epoch 18, Loss: 1.2737199068069458


Training Progress:  10%|█         | 20/200 [00:02<00:22,  8.13it/s]

Epoch 19, Loss: 1.1634953022003174
Epoch 20, Loss: 1.146265983581543


Training Progress:  10%|█         | 21/200 [00:02<00:21,  8.24it/s]

Epoch 21, Loss: 1.12949800491333


Training Progress:  12%|█▏        | 23/200 [00:03<00:24,  7.27it/s]

Epoch 22, Loss: 1.172580361366272
Epoch 23, Loss: 1.1385987997055054


Training Progress:  12%|█▎        | 25/200 [00:03<00:21,  8.04it/s]

Epoch 24, Loss: 1.1239733695983887
Epoch 25, Loss: 1.09111487865448


Training Progress:  13%|█▎        | 26/200 [00:03<00:21,  8.17it/s]

Epoch 26, Loss: 1.0850938558578491


Training Progress:  14%|█▍        | 28/200 [00:03<00:23,  7.25it/s]

Epoch 27, Loss: 1.0232970714569092
Epoch 28, Loss: 1.0130397081375122


Training Progress:  15%|█▌        | 30/200 [00:03<00:21,  8.00it/s]

Epoch 29, Loss: 0.9708678722381592
Epoch 30, Loss: 0.9752405285835266


Training Progress:  16%|█▌        | 31/200 [00:04<00:20,  8.25it/s]

Epoch 31, Loss: 0.920958399772644


Training Progress:  16%|█▋        | 33/200 [00:04<00:22,  7.37it/s]

Epoch 32, Loss: 0.9068566560745239
Epoch 33, Loss: 0.8663419485092163


Training Progress:  18%|█▊        | 35/200 [00:04<00:20,  8.02it/s]

Epoch 34, Loss: 0.8471171259880066
Epoch 35, Loss: 0.8161116242408752


Training Progress:  18%|█▊        | 36/200 [00:04<00:20,  8.19it/s]

Epoch 36, Loss: 0.8034695386886597


Training Progress:  19%|█▉        | 38/200 [00:05<00:22,  7.31it/s]

Epoch 37, Loss: 0.7898659706115723
Epoch 38, Loss: 0.7759367227554321


Training Progress:  20%|██        | 40/200 [00:05<00:19,  8.08it/s]

Epoch 39, Loss: 0.7685392498970032
Epoch 40, Loss: 0.7292000651359558


Training Progress:  20%|██        | 41/200 [00:05<00:19,  8.35it/s]

Epoch 41, Loss: 0.7223179340362549


Training Progress:  22%|██▏       | 43/200 [00:05<00:21,  7.29it/s]

Epoch 42, Loss: 0.6978336572647095
Epoch 43, Loss: 0.6623439788818359


Training Progress:  22%|██▎       | 45/200 [00:05<00:19,  8.07it/s]

Epoch 44, Loss: 0.6525153517723083
Epoch 45, Loss: 0.6521598100662231


Training Progress:  23%|██▎       | 46/200 [00:06<00:18,  8.35it/s]

Epoch 46, Loss: 0.6402828097343445


Training Progress:  24%|██▍       | 48/200 [00:06<00:20,  7.35it/s]

Epoch 47, Loss: 0.6031734347343445
Epoch 48, Loss: 0.6035065650939941


Training Progress:  25%|██▌       | 50/200 [00:06<00:18,  8.12it/s]

Epoch 49, Loss: 0.5979751348495483
Epoch 50, Loss: 0.6257765889167786


Training Progress:  26%|██▌       | 51/200 [00:06<00:17,  8.29it/s]

Epoch 51, Loss: 0.6183034777641296


Training Progress:  26%|██▋       | 53/200 [00:07<00:20,  7.24it/s]

Epoch 52, Loss: 0.6302320957183838
Epoch 53, Loss: 0.6035317778587341


Training Progress:  28%|██▊       | 55/200 [00:07<00:17,  8.07it/s]

Epoch 54, Loss: 0.5998950004577637
Epoch 55, Loss: 0.6081214547157288


Training Progress:  28%|██▊       | 56/200 [00:07<00:17,  8.33it/s]

Epoch 56, Loss: 0.5872489809989929


Training Progress:  29%|██▉       | 58/200 [00:07<00:19,  7.38it/s]

Epoch 57, Loss: 0.5701995491981506
Epoch 58, Loss: 0.5257999897003174


Training Progress:  30%|███       | 60/200 [00:07<00:17,  8.01it/s]

Epoch 59, Loss: 0.5015361905097961
Epoch 60, Loss: 0.5006025433540344


Training Progress:  30%|███       | 61/200 [00:08<00:16,  8.29it/s]

Epoch 61, Loss: 0.4856654405593872


Training Progress:  32%|███▏      | 63/200 [00:08<00:18,  7.33it/s]

Epoch 62, Loss: 0.4520367383956909
Epoch 63, Loss: 0.4505131244659424


Training Progress:  32%|███▎      | 65/200 [00:08<00:16,  8.05it/s]

Epoch 64, Loss: 0.44306764006614685
Epoch 65, Loss: 0.42292213439941406


Training Progress:  33%|███▎      | 66/200 [00:08<00:16,  8.30it/s]

Epoch 66, Loss: 0.41632822155952454


Training Progress:  34%|███▍      | 68/200 [00:09<00:18,  7.28it/s]

Epoch 67, Loss: 0.41661545634269714
Epoch 68, Loss: 0.4265749156475067


Training Progress:  35%|███▌      | 70/200 [00:09<00:16,  8.04it/s]

Epoch 69, Loss: 0.42488840222358704
Epoch 70, Loss: 0.4284742474555969


Training Progress:  36%|███▌      | 71/200 [00:09<00:15,  8.23it/s]

Epoch 71, Loss: 0.40397197008132935


Training Progress:  36%|███▋      | 73/200 [00:09<00:17,  7.32it/s]

Epoch 72, Loss: 0.4194675087928772
Epoch 73, Loss: 0.41475793719291687


Training Progress:  38%|███▊      | 75/200 [00:09<00:15,  8.04it/s]

Epoch 74, Loss: 0.4196915626525879
Epoch 75, Loss: 0.3901519775390625


Training Progress:  38%|███▊      | 76/200 [00:10<00:15,  8.23it/s]

Epoch 76, Loss: 0.42292630672454834


Training Progress:  39%|███▉      | 78/200 [00:10<00:16,  7.29it/s]

Epoch 77, Loss: 0.38428202271461487
Epoch 78, Loss: 0.4056985080242157


Training Progress:  40%|████      | 80/200 [00:10<00:14,  8.07it/s]

Epoch 79, Loss: 0.39972221851348877
Epoch 80, Loss: 0.37710994482040405


Training Progress:  40%|████      | 81/200 [00:10<00:14,  8.27it/s]

Epoch 81, Loss: 0.3918963372707367


Training Progress:  42%|████▏     | 83/200 [00:10<00:16,  7.25it/s]

Epoch 82, Loss: 0.35815465450286865
Epoch 83, Loss: 0.3618629574775696


Training Progress:  42%|████▎     | 85/200 [00:11<00:14,  7.95it/s]

Epoch 84, Loss: 0.3887386918067932
Epoch 85, Loss: 0.3603391945362091


Training Progress:  43%|████▎     | 86/200 [00:11<00:14,  8.09it/s]

Epoch 86, Loss: 0.36242833733558655


Training Progress:  44%|████▍     | 88/200 [00:11<00:15,  7.13it/s]

Epoch 87, Loss: 0.351130872964859
Epoch 88, Loss: 0.35017475485801697


Training Progress:  45%|████▌     | 90/200 [00:11<00:13,  7.97it/s]

Epoch 89, Loss: 0.35988807678222656
Epoch 90, Loss: 0.3487335443496704


Training Progress:  46%|████▌     | 91/200 [00:12<00:13,  8.25it/s]

Epoch 91, Loss: 0.3528640866279602


Training Progress:  46%|████▋     | 93/200 [00:12<00:14,  7.25it/s]

Epoch 92, Loss: 0.35164880752563477
Epoch 93, Loss: 0.3510408401489258


Training Progress:  48%|████▊     | 95/200 [00:12<00:13,  7.99it/s]

Epoch 94, Loss: 0.33889657258987427
Epoch 95, Loss: 0.33757132291793823


Training Progress:  48%|████▊     | 96/200 [00:12<00:12,  8.21it/s]

Epoch 96, Loss: 0.33617374300956726


Training Progress:  49%|████▉     | 98/200 [00:12<00:14,  7.27it/s]

Epoch 97, Loss: 0.32529568672180176
Epoch 98, Loss: 0.33718225359916687


Training Progress:  50%|█████     | 100/200 [00:13<00:12,  7.91it/s]

Epoch 99, Loss: 0.3303123116493225
Epoch 100, Loss: 0.3379436433315277


Training Progress:  50%|█████     | 101/200 [00:13<00:12,  8.07it/s]

Epoch 101, Loss: 0.32654455304145813


Training Progress:  52%|█████▏    | 103/200 [00:13<00:13,  7.15it/s]

Epoch 102, Loss: 0.3265248239040375
Epoch 103, Loss: 0.31794092059135437


Training Progress:  52%|█████▎    | 105/200 [00:13<00:11,  7.98it/s]

Epoch 104, Loss: 0.3243792951107025
Epoch 105, Loss: 0.324476420879364


Training Progress:  53%|█████▎    | 106/200 [00:14<00:11,  8.22it/s]

Epoch 106, Loss: 0.3191491663455963


Training Progress:  54%|█████▍    | 108/200 [00:14<00:12,  7.34it/s]

Epoch 107, Loss: 0.32426273822784424
Epoch 108, Loss: 0.31948599219322205


Training Progress:  55%|█████▌    | 110/200 [00:14<00:11,  8.11it/s]

Epoch 109, Loss: 0.3230257034301758
Epoch 110, Loss: 0.31974849104881287


Training Progress:  56%|█████▌    | 111/200 [00:14<00:10,  8.27it/s]

Epoch 111, Loss: 0.30272024869918823


Training Progress:  56%|█████▋    | 113/200 [00:14<00:11,  7.26it/s]

Epoch 112, Loss: 0.30301254987716675
Epoch 113, Loss: 0.3083300292491913


Training Progress:  57%|█████▊    | 115/200 [00:15<00:10,  8.01it/s]

Epoch 114, Loss: 0.3007623255252838
Epoch 115, Loss: 0.2932319641113281


Training Progress:  58%|█████▊    | 116/200 [00:15<00:10,  8.27it/s]

Epoch 116, Loss: 0.30365678668022156


Training Progress:  59%|█████▉    | 118/200 [00:15<00:11,  7.29it/s]

Epoch 117, Loss: 0.29914212226867676
Epoch 118, Loss: 0.29768994450569153


Training Progress:  60%|██████    | 120/200 [00:15<00:10,  7.97it/s]

Epoch 119, Loss: 0.28943201899528503
Epoch 120, Loss: 0.2792482078075409


Training Progress:  60%|██████    | 121/200 [00:15<00:09,  8.26it/s]

Epoch 121, Loss: 0.2776983976364136


Training Progress:  62%|██████▏   | 123/200 [00:16<00:10,  7.27it/s]

Epoch 122, Loss: 0.26750898361206055
Epoch 123, Loss: 0.2712801396846771


Training Progress:  62%|██████▎   | 125/200 [00:16<00:09,  7.90it/s]

Epoch 124, Loss: 0.2652173340320587
Epoch 125, Loss: 0.2664148211479187


Training Progress:  63%|██████▎   | 126/200 [00:16<00:09,  8.21it/s]

Epoch 126, Loss: 0.25905492901802063


Training Progress:  64%|██████▍   | 128/200 [00:16<00:09,  7.28it/s]

Epoch 127, Loss: 0.26856374740600586
Epoch 128, Loss: 0.24744759500026703


Training Progress:  65%|██████▌   | 130/200 [00:17<00:08,  8.03it/s]

Epoch 129, Loss: 0.24472706019878387
Epoch 130, Loss: 0.2536908984184265


Training Progress:  66%|██████▌   | 131/200 [00:17<00:08,  8.15it/s]

Epoch 131, Loss: 0.2957623600959778


Training Progress:  66%|██████▋   | 133/200 [00:17<00:09,  7.31it/s]

Epoch 132, Loss: 0.29743117094039917
Epoch 133, Loss: 0.27367132902145386


Training Progress:  68%|██████▊   | 135/200 [00:17<00:08,  8.06it/s]

Epoch 134, Loss: 0.2874687612056732
Epoch 135, Loss: 0.3128950595855713


Training Progress:  68%|██████▊   | 136/200 [00:17<00:07,  8.30it/s]

Epoch 136, Loss: 0.2947370707988739


Training Progress:  69%|██████▉   | 138/200 [00:18<00:08,  7.35it/s]

Epoch 137, Loss: 0.2756445109844208
Epoch 138, Loss: 0.27042463421821594


Training Progress:  70%|███████   | 140/200 [00:18<00:07,  8.12it/s]

Epoch 139, Loss: 0.26482176780700684
Epoch 140, Loss: 0.25665462017059326


Training Progress:  70%|███████   | 141/200 [00:18<00:07,  8.35it/s]

Epoch 141, Loss: 0.2511240839958191


Training Progress:  72%|███████▏  | 143/200 [00:18<00:07,  7.28it/s]

Epoch 142, Loss: 0.24048839509487152
Epoch 143, Loss: 0.22027654945850372


Training Progress:  72%|███████▎  | 145/200 [00:19<00:06,  7.95it/s]

Epoch 144, Loss: 0.22534507513046265
Epoch 145, Loss: 0.21714608371257782


Training Progress:  73%|███████▎  | 146/200 [00:19<00:06,  8.13it/s]

Epoch 146, Loss: 0.2169678509235382


Training Progress:  74%|███████▍  | 148/200 [00:19<00:07,  7.18it/s]

Epoch 147, Loss: 0.2155901938676834
Epoch 148, Loss: 0.21302223205566406


Training Progress:  75%|███████▌  | 150/200 [00:19<00:06,  7.92it/s]

Epoch 149, Loss: 0.19422391057014465
Epoch 150, Loss: 0.21964363753795624


Training Progress:  76%|███████▌  | 151/200 [00:19<00:06,  8.11it/s]

Epoch 151, Loss: 0.22476153075695038


Training Progress:  76%|███████▋  | 153/200 [00:20<00:06,  7.28it/s]

Epoch 152, Loss: 0.21024417877197266
Epoch 153, Loss: 0.20224541425704956


Training Progress:  78%|███████▊  | 155/200 [00:20<00:05,  7.85it/s]

Epoch 154, Loss: 0.2236177921295166
Epoch 155, Loss: 0.214665487408638


Training Progress:  78%|███████▊  | 156/200 [00:20<00:05,  8.17it/s]

Epoch 156, Loss: 0.20952250063419342


Training Progress:  79%|███████▉  | 158/200 [00:20<00:05,  7.23it/s]

Epoch 157, Loss: 0.2056395262479782
Epoch 158, Loss: 0.18669232726097107


Training Progress:  80%|████████  | 160/200 [00:21<00:05,  7.90it/s]

Epoch 159, Loss: 0.18730776011943817
Epoch 160, Loss: 0.19331125915050507


Training Progress:  80%|████████  | 161/200 [00:21<00:04,  8.22it/s]

Epoch 161, Loss: 0.20351828634738922


Training Progress:  82%|████████▏ | 163/200 [00:21<00:05,  7.24it/s]

Epoch 162, Loss: 0.19247733056545258
Epoch 163, Loss: 0.1879175454378128


Training Progress:  82%|████████▎ | 165/200 [00:21<00:04,  8.03it/s]

Epoch 164, Loss: 0.19777658581733704
Epoch 165, Loss: 0.16973164677619934


Training Progress:  83%|████████▎ | 166/200 [00:21<00:04,  8.18it/s]

Epoch 166, Loss: 0.17269520461559296


Training Progress:  84%|████████▍ | 168/200 [00:22<00:04,  7.29it/s]

Epoch 167, Loss: 0.1699836254119873
Epoch 168, Loss: 0.16082099080085754


Training Progress:  85%|████████▌ | 170/200 [00:22<00:03,  8.09it/s]

Epoch 169, Loss: 0.15654197335243225
Epoch 170, Loss: 0.16266924142837524


Training Progress:  86%|████████▌ | 171/200 [00:22<00:03,  8.35it/s]

Epoch 171, Loss: 0.17528818547725677


Training Progress:  86%|████████▋ | 173/200 [00:22<00:03,  7.32it/s]

Epoch 172, Loss: 0.17443248629570007
Epoch 173, Loss: 0.15959620475769043


Training Progress:  88%|████████▊ | 175/200 [00:23<00:03,  7.93it/s]

Epoch 174, Loss: 0.19004599750041962
Epoch 175, Loss: 0.17844609916210175


Training Progress:  88%|████████▊ | 176/200 [00:23<00:02,  8.09it/s]

Epoch 176, Loss: 0.20234698057174683


Training Progress:  89%|████████▉ | 178/200 [00:23<00:03,  7.21it/s]

Epoch 177, Loss: 0.20413735508918762
Epoch 178, Loss: 0.23364749550819397


Training Progress:  90%|█████████ | 180/200 [00:23<00:02,  8.00it/s]

Epoch 179, Loss: 0.1901322901248932
Epoch 180, Loss: 0.2352316677570343


Training Progress:  90%|█████████ | 181/200 [00:23<00:02,  8.18it/s]

Epoch 181, Loss: 0.22354261577129364


Training Progress:  92%|█████████▏| 183/200 [00:24<00:02,  7.29it/s]

Epoch 182, Loss: 0.24566051363945007
Epoch 183, Loss: 0.2099277675151825


Training Progress:  92%|█████████▎| 185/200 [00:24<00:01,  8.03it/s]

Epoch 184, Loss: 0.3504524230957031
Epoch 185, Loss: 0.21602559089660645


Training Progress:  93%|█████████▎| 186/200 [00:24<00:01,  8.23it/s]

Epoch 186, Loss: 0.28905385732650757


Training Progress:  94%|█████████▍| 188/200 [00:24<00:01,  7.29it/s]

Epoch 187, Loss: 0.21819478273391724
Epoch 188, Loss: 0.21753206849098206


Training Progress:  94%|█████████▍| 188/200 [00:25<00:01,  7.51it/s]

Epoch 189, Loss: 0.20613722503185272
Early stopping at epoch 189





In [42]:
def save_embeddings(z, emb, output_path, model_name="model"):
    """
    학습된 임베딩을 CSV 파일로 저장하는 함수.

    Args:
        z (torch.Tensor): 학습된 임베딩 (노드 임베딩).
        emb (pd.DataFrame): 원본 데이터프레임 (ADM_NM 열 포함).
        output_path (str): 저장할 CSV 파일 경로.
        model_name (str): 임베딩 컬럼에 추가할 모델 이름 접두사 (예: "GCN", "GAT").

    Returns:
        None
    """
    # GPU 텐서를 NumPy 배열로 변환
    z_numpy = z.detach().cpu().numpy()

    # 새로운 데이터프레임 생성: ADM_NM + 학습된 임베딩
    embedding_columns = [f"{model_name}_embed{i}" for i in range(z_numpy.shape[1])]  # 임베딩 컬럼 이름 생성
    emb_updated = pd.DataFrame(emb[['기준일ID', '행정동코드']], columns=['기준일ID', '행정동코드'])  # ADM_NM 열만 복사
    emb_updated[embedding_columns] = z_numpy  # 임베딩 추가

    # CSV로 저장
    # emb_updated.to_csv(output_path, index=False)
    # print(f"Embeddings saved to: {output_path}")
    
    return emb_updated

In [43]:
gcn_df = save_embeddings(z_gcn, emb, 'a', 'GCN')
gat_df = save_embeddings(z_gat, emb, 'b', 'GAT')

In [44]:
gcn_df

Unnamed: 0,기준일ID,행정동코드,GCN_embed0,GCN_embed1,GCN_embed2,GCN_embed3,GCN_embed4,GCN_embed5,GCN_embed6,GCN_embed7,...,GCN_embed25,GCN_embed26,GCN_embed27,GCN_embed28,GCN_embed29,GCN_embed30,GCN_embed31,GCN_embed32,GCN_embed33,GCN_embed34
0,201701,청운효자동,-1.074775,1.010859,1.665435,-2.015037,-1.024539,0.843725,-1.007559,-1.663782,...,1.743431,1.089767,2.082939,-0.009981,0.455670,0.220273,-2.229680,-0.264066,0.534541,2.462892
1,201701,사직동,-0.956690,0.717311,1.137805,-1.975991,-0.219769,0.996532,-1.062324,-1.641588,...,1.297022,0.875547,1.785869,-0.013894,0.423590,0.208445,-2.219543,-0.272838,0.242694,2.039333
2,201701,삼청동,-1.016063,0.709976,1.073384,-2.374769,-0.114190,1.251863,-1.170677,-1.878487,...,1.424986,0.841455,1.936456,-0.118748,0.536596,0.226169,-2.537327,-0.285981,0.184181,2.246561
3,201701,부암동,-1.132262,0.798428,1.176160,-2.667495,-0.206448,1.414158,-1.250911,-2.084862,...,1.627175,0.877967,2.139970,-0.214334,0.592086,0.264167,-2.791227,-0.348340,0.243941,2.499584
4,201701,평창동,-1.210514,0.861963,1.259817,-2.804583,-0.314177,1.484705,-1.290055,-2.185333,...,1.761885,0.901824,2.261122,-0.268917,0.613923,0.283362,-2.898780,-0.383092,0.298177,2.636966
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
419,201701,성내2동,0.619395,0.045704,1.792655,0.608224,5.028989,0.806009,-1.161979,-1.452925,...,-1.510692,1.862023,-0.171477,-1.740461,-3.546756,0.678891,-1.370561,-1.721826,1.257090,1.731453
420,201701,성내3동,0.620451,0.043403,1.797358,0.623784,5.049545,0.804108,-1.163838,-1.451354,...,-1.513293,1.869788,-0.174239,-1.755055,-3.558245,0.676727,-1.386368,-1.731173,1.251065,1.733127
421,201701,길동,0.621935,0.040838,1.802128,0.638617,5.072171,0.802370,-1.165782,-1.449877,...,-1.516407,1.877763,-0.177339,-1.770049,-3.570510,0.674650,-1.403128,-1.740887,1.244996,1.735240
422,201701,둔촌1동,0.623400,0.038370,1.806558,0.652348,5.093230,0.800713,-1.167281,-1.448273,...,-1.519340,1.885019,-0.180316,-1.784241,-3.581704,0.672697,-1.418999,-1.750064,1.239304,1.737058


# AirBnB 데이터 다시 저장

In [55]:
import ast  # 문자열 리스트를 실제 리스트로 변환

def expand_llm_embeddings(df, embeddings_col='LLM Embeddings'):
    """
    주어진 데이터프레임의 LLM Embeddings 열을 확장하여 각 값을 개별 열로 변환.
    
    Parameters:
        df (pd.DataFrame): 입력 데이터프레임.
        embeddings_col (str): 확장할 열의 이름 (기본값: 'LLM Embeddings').

    Returns:
        pd.DataFrame: LLM Embeddings가 확장된 데이터프레임.
    """
    # Step 1: LLM Embeddings를 리스트로 변환
    df[embeddings_col] = df[embeddings_col].apply(ast.literal_eval)

    # Step 2: 리스트 길이 확인
    max_len = max(len(x) for x in df[embeddings_col])
    print(f"Max length of {embeddings_col}: {max_len}")

    # Step 3: LLM Embeddings를 확장하여 새 열 생성
    embedding_columns = [f"{embeddings_col}_{i}" for i in range(max_len)]
    embedding_df = pd.DataFrame(df[embeddings_col].tolist(), columns=embedding_columns, index=df.index)

    # Step 4: 기존 데이터프레임과 병합
    df = pd.concat([df, embedding_df], axis=1)

    # Step 5: 기존 LLM Embeddings 열 제거
    df = df.drop(columns=[embeddings_col])

    return df

In [59]:
# 데이터 로드
emb = pd.read_csv('../../Dataset/AirBnB_LLM/gemma2_no_listing_raw_fill_na_ver.csv')
#emb = emb[['ADM_NM', 'Reporting Month', 'LLM Embeddings']]
#emb = expand_llm_embeddings(emb, embeddings_col='LLM Embeddings')

emb

Unnamed: 0,ADM_NM,Reporting Month,Property ID,Occupancy Rate,Revenue (USD),Number of Reservations,LLM Embeddings
0,가락1동,2017-01-01,"['ab-8296118', 'ab-13633415', 'ab-14432019', '...",0.000000,0.00,0.0,"['0.15136719', '-0.13867188', '-1.2421875', '-..."
1,가락2동,2017-01-01,"['ab-6240301', 'ab-14455385', 'ab-2039222']",0.000000,0.00,0.0,"['0.20410156', '-0.1953125', '-1.234375', '-0...."
2,가락본동,2017-01-01,"['ab-7607282', 'ab-7792221', 'ab-7792395', 'ab...",0.249571,4319.67,30.0,"['0.24902344', '-0.17675781', '-1.2421875', '-..."
3,가리봉동,2017-01-01,"['ab-7538039', 'ab-11981624', 'ab-11987983', '...",0.200000,515.00,5.0,"['0.36914062', '-0.20996094', '-1.203125', '-0..."
4,가산동,2017-01-01,"['ab-9098630', 'ab-10234965', 'ab-11777964', '...",0.000000,0.00,0.0,"['0.31445312', '-0.24121094', '-1.0546875', '-..."
...,...,...,...,...,...,...,...
28403,효창동,2022-07-01,"['ab-53466085', 'ab-54039869', 'ab-59300548353...",0.209875,8802.74,44.0,"['0.15722656', '-0.29101562', '-1.4140625', '-..."
28404,후암동,2022-07-01,"['ab-660505188610948586', 'ab-6620696254432664...",0.292348,113240.44,387.0,"['0.39453125', '-0.375', '-1.234375', '-0.2773..."
28405,휘경1동,2022-07-01,"['ab-661330943863214668', 'ab-52644575', 'ab-5...",0.348942,18969.15,154.0,"['0.18457031', '-0.38085938', '-1.4453125', '-..."
28406,휘경2동,2022-07-01,"['ab-53056242', 'ab-593955664612254971', 'ab-4...",0.268609,4532.20,41.0,"['0.18652344', '-0.31054688', '-1.3515625', '-..."


In [12]:
import os

basic_path = '../../Dataset/AirBnB_LLM'
output_path = '../../Dataset/AirBnB_LLM_New'

# 파일명들 불러오기
file_names = os.listdir(basic_path)
llm_list = [s for s in file_names  if '_fill_na_ver' in s]
llm_list

['llama3_no_listing_refined_fill_na_ver.csv',
 'llama3_mixed_refined_fill_na_ver.csv',
 'llama3_mixed_raw_fill_na_ver.csv',
 'llama3_only_listings_fill_na_ver.csv',
 'gemma2_no_listing_raw_fill_na_ver.csv',
 'gemma2_no_listing_refined_fill_na_ver.csv',
 'llama3_no_listing_raw_fill_na_ver.csv']

In [64]:
# llm 데이터 다시 저장
for llm in llm_list:
    emb = pd.read_csv(f'{basic_path}/{llm}')
    labels = emb[['ADM_NM', 'Reporting Month', 'Occupancy Rate', 'Revenue (USD)', 'Number of Reservations']]  # 라벨변수
    emb = emb[['ADM_NM', 'Reporting Month', 'LLM Embeddings']]
    emb = expand_llm_embeddings(emb, embeddings_col='LLM Embeddings')
    emb.to_csv(f'{output_path}/{llm}', index=False)   # 새롭게 저장
    labels.to_csv(f'{output_path}/{llm[:-4]}_labels.csv', index=False)
    print(f'{llm} finish!')
    print(emb.shape)

Max length of LLM Embeddings: 3072
llama3_no_listing_refined_fill_na_ver.csv finish!
(28408, 3074)
Max length of LLM Embeddings: 3072
llama3_mixed_refined_fill_na_ver.csv finish!
(28408, 3074)
Max length of LLM Embeddings: 3072
llama3_mixed_raw_fill_na_ver.csv finish!
(28408, 3074)
Max length of LLM Embeddings: 3072
llama3_only_listings_fill_na_ver.csv finish!
(28408, 3074)
Max length of LLM Embeddings: 2304
gemma2_no_listing_raw_fill_na_ver.csv finish!
(28408, 2306)
Max length of LLM Embeddings: 2304
gemma2_no_listing_refined_fill_na_ver.csv finish!
(28408, 2306)
Max length of LLM Embeddings: 3072
llama3_no_listing_raw_fill_na_ver.csv finish!
(28408, 3074)


In [16]:
# 완성 버전으로 불러오기

basic_path = '../../Dataset/AirBnB_LLM_New/'

# 파일명들 불러오기
file_names = os.listdir(basic_path)
llm_list = [s for s in file_names  if '_fill_na_ver.csv' in s]
llm_list

['llama3_no_listing_refined_fill_na_ver.csv',
 'llama3_mixed_refined_fill_na_ver.csv',
 'llama3_mixed_raw_fill_na_ver.csv',
 'llama3_only_listings_fill_na_ver.csv',
 'gemma2_no_listing_raw_fill_na_ver.csv',
 'gemma2_no_listing_refined_fill_na_ver.csv',
 'llama3_no_listing_raw_fill_na_ver.csv']

In [None]:
# 새롭게 저장된 버전 불러오기
llm_df = pd.read_csv(basic_path+llm_list[0])
llm_df

Unnamed: 0,ADM_NM,Reporting Month,LLM Embeddings_0,LLM Embeddings_1,LLM Embeddings_2,LLM Embeddings_3,LLM Embeddings_4,LLM Embeddings_5,LLM Embeddings_6,LLM Embeddings_7,...,LLM Embeddings_3062,LLM Embeddings_3063,LLM Embeddings_3064,LLM Embeddings_3065,LLM Embeddings_3066,LLM Embeddings_3067,LLM Embeddings_3068,LLM Embeddings_3069,LLM Embeddings_3070,LLM Embeddings_3071
0,가락1동,2017-01-01,0.182189,0.797635,2.783483,-0.701678,0.322488,-0.958251,-0.049312,0.477725,...,-0.097656,0.215152,0.293026,-0.321801,0.168507,0.383450,1.076234,-0.122758,-0.496166,0.353151
1,가락2동,2017-01-01,0.012297,0.788067,2.737332,-0.818893,0.190079,-1.046540,-0.118247,0.573178,...,-0.084388,0.256985,0.328190,-0.304241,0.251695,0.308751,0.974102,-0.033396,-0.474156,0.405358
2,가락본동,2017-01-01,-0.188756,0.613868,2.676397,-0.363336,0.272512,-1.133036,0.080491,0.709784,...,-0.180724,0.597994,0.214895,-0.032563,0.046565,0.770207,0.793798,0.051106,-0.697922,0.204088
3,가리봉동,2017-01-01,-0.019578,0.852448,2.604572,-0.697828,0.285646,-0.979806,0.017082,0.709763,...,-0.226313,0.189473,0.370528,-0.330595,0.197076,0.346764,0.979650,0.030385,-0.429923,0.396902
4,가산동,2017-01-01,-0.112694,0.606479,2.593920,-0.665623,0.441548,-1.082860,-0.001558,0.557418,...,-0.176513,0.336681,0.450447,-0.124183,0.145434,0.386809,1.054443,0.105295,-0.655455,0.193765
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28403,효창동,2022-07-01,-0.057492,0.588885,2.498071,-0.576995,0.077653,-1.097834,0.177970,0.717874,...,-0.014342,0.495624,0.320659,0.046189,-0.013092,0.547956,0.944268,0.102939,-0.605520,0.238395
28404,후암동,2022-07-01,-0.113301,0.605343,2.413508,-0.357536,0.357129,-0.846870,-0.298720,0.749327,...,-0.429303,0.589567,0.152206,0.046052,-0.068495,0.537594,0.725969,0.042671,-0.721411,0.627279
28405,휘경1동,2022-07-01,0.002187,0.594431,2.574777,-0.520903,0.230063,-0.997220,0.003344,0.779653,...,-0.275357,0.383240,0.274320,0.014191,0.064286,0.339190,1.015625,0.167795,-0.624249,0.414327
28406,휘경2동,2022-07-01,0.048968,0.679767,2.465750,-0.614556,0.219916,-1.036563,0.116839,0.775792,...,-0.144036,0.361136,0.269136,0.003099,0.118099,0.432616,0.995827,0.153141,-0.507071,0.349329


In [24]:
group = llm_df.groupby(['Reporting Month'])
for key, df in group:
    print(str(key[0]).replace('-',''))
    display(df)
    break

20170101


Unnamed: 0,ADM_NM,Reporting Month,LLM Embeddings_0,LLM Embeddings_1,LLM Embeddings_2,LLM Embeddings_3,LLM Embeddings_4,LLM Embeddings_5,LLM Embeddings_6,LLM Embeddings_7,...,LLM Embeddings_3062,LLM Embeddings_3063,LLM Embeddings_3064,LLM Embeddings_3065,LLM Embeddings_3066,LLM Embeddings_3067,LLM Embeddings_3068,LLM Embeddings_3069,LLM Embeddings_3070,LLM Embeddings_3071
0,가락1동,2017-01-01,0.182189,0.797635,2.783483,-0.701678,0.322488,-0.958251,-0.049312,0.477725,...,-0.097656,0.215152,0.293026,-0.321801,0.168507,0.383450,1.076234,-0.122758,-0.496166,0.353151
1,가락2동,2017-01-01,0.012297,0.788067,2.737332,-0.818893,0.190079,-1.046540,-0.118247,0.573178,...,-0.084388,0.256985,0.328190,-0.304241,0.251695,0.308751,0.974102,-0.033396,-0.474156,0.405358
2,가락본동,2017-01-01,-0.188756,0.613868,2.676397,-0.363336,0.272512,-1.133036,0.080491,0.709784,...,-0.180724,0.597994,0.214895,-0.032563,0.046565,0.770207,0.793798,0.051106,-0.697922,0.204088
3,가리봉동,2017-01-01,-0.019578,0.852448,2.604572,-0.697828,0.285646,-0.979806,0.017082,0.709763,...,-0.226313,0.189473,0.370528,-0.330595,0.197076,0.346764,0.979650,0.030385,-0.429923,0.396902
4,가산동,2017-01-01,-0.112694,0.606479,2.593920,-0.665623,0.441548,-1.082860,-0.001558,0.557418,...,-0.176513,0.336681,0.450447,-0.124183,0.145434,0.386809,1.054443,0.105295,-0.655455,0.193765
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
419,효창동,2017-01-01,-0.209827,0.555203,2.690814,-0.383771,0.146396,-1.003996,-0.103601,0.638757,...,-0.072862,0.744518,0.174405,-0.045266,0.099417,0.663327,0.911730,-0.033662,-0.775468,0.109442
420,후암동,2017-01-01,-0.162344,0.632814,2.539969,-0.355085,0.503987,-0.800747,-0.469939,0.773364,...,-0.388491,0.642842,0.226201,-0.048449,0.059380,0.547762,0.668395,0.119956,-0.755786,0.512600
421,휘경1동,2017-01-01,-0.347272,0.624385,2.570642,-0.407633,0.262688,-1.072827,0.171972,0.756842,...,-0.226987,0.545958,0.381583,-0.023453,0.094286,0.634946,0.807298,-0.003554,-0.688187,0.353662
422,휘경2동,2017-01-01,-0.198812,0.617473,2.636014,-0.380618,0.269563,-0.991828,0.192307,0.750091,...,-0.194521,0.533942,0.319364,0.139239,0.148130,0.393189,1.117845,0.114787,-0.543678,0.152803


In [20]:
# airbnb 그래프 구조 불러오기
basic_path = '../../Dataset/AirBnB_Graph/llama3_no_listing_raw/'
start_year = 2017
start_month = 1
end_year = 2022
end_month = 7

# 경로들 확인
path_list = [
    basic_path+f"{year}{month:02d}01.gpickle"
    for year in range(start_year, end_year + 1)
    for month in range(1, 13)
    if not (year == end_year and month > end_month)
]

edge_index, edge_weight = get_graph(path_list[0], llm_df, 'ADM_NM')
edge_index

tensor([[27984, 27984, 27984,  ..., 28405, 28405, 28406],
        [27988, 27990, 27997,  ..., 28406, 28407, 28407]])