In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# 현재 디렉터리 확인
!pwd

/content


In [3]:
# import해서 불러올 폴더 디렉터리로 변경 -> 즉, 폴더의 위치
# 반드시, 파일들을 같은 폴더에 해야해요!

# %cd '폴더 저장 경로'

%cd /content/drive/MyDrive/IDS/amaxon reviews 2023/최종본/import 방식/Transnet

/content/drive/MyDrive/IDS/amaxon reviews 2023/최종본/import 방식/Transnet


In [4]:
# 변경한 디렉터리의 파일 확인

!ls

config.ipynb  main.ipynb   model.py	Upload_Transnet.ipynb  utils.py
config.py     model.ipynb  __pycache__	utils.ipynb


In [5]:
# 변경한 디렉토리로 잘 되었는지 확인
!pwd

/content/drive/MyDrive/IDS/amaxon reviews 2023/최종본/import 방식/Transnet


In [6]:
!pip install torch torchvision




# main

In [7]:
import torch
from torch import nn
import inspect
import os
import pickle
import pandas as pd
import time
from torch.nn import functional as F

In [8]:
from torch.utils.data import DataLoader
from config import Config
from model import SourceNet, TargetNet, CNN, FactorizationMachine
from utils import TransNetsDataset, calculate_mse, date



In [9]:
# 학습 및 테스트 함수
def train(train_dataloader, valid_dataloader, model_S, model_T, config, model_path):
    print(f'{date()}## Start the training!')
    train_mse = calculate_mse(model_S, train_dataloader, config.device)
    valid_mse = calculate_mse(model_S, valid_dataloader, config.device)
    print(f'{date()}#### Initial train mse {train_mse:.6f}, validation mse {valid_mse:.6f}')
    start_time = time.perf_counter()

    opt_S = torch.optim.Adam(model_S.parameters(), config.learning_rate, weight_decay=config.l2_regularization)
    opt_trans = torch.optim.Adam(model_S.trans_param(), config.learning_rate, weight_decay=config.l2_regularization)
    opt_T = torch.optim.Adam(model_T.parameters(), config.learning_rate, weight_decay=config.l2_regularization)
    lr_sch_S = torch.optim.lr_scheduler.ExponentialLR(opt_S, config.learning_rate_decay)
    lr_sch_trans = torch.optim.lr_scheduler.ExponentialLR(opt_trans, config.learning_rate_decay)
    lr_sch_T = torch.optim.lr_scheduler.ExponentialLR(opt_T, config.learning_rate_decay)

    best_loss, batch_step = 100, 0
    model_T.train()
    for epoch in range(config.train_epochs):
        model_S.train()
        total_loss, total_samples = 0, 0
        for batch in train_dataloader:
            user_reviews, item_reviews, reviews, ratings, user_ids, item_ids = [x.to(config.device) for x in batch]

    ###########################################################

            # Algorithm 1. Training Transnet

                ## Step 1. Train Target Network on the actual review
            latent_T, pred_T = model_T(reviews)
            loss_T = F.l1_loss(pred_T, ratings)
            opt_T.zero_grad()
            loss_T.backward()

                ## Step 2. Learn to Transform
            latent_S, pred_S = model_S(user_reviews, item_reviews, user_ids, item_ids)
            loss_trans = F.mse_loss(latent_S, latent_T.detach())
            opt_trans.zero_grad()
            loss_trans.backward()

                ## Step 3. Train a predictor on the transform input
            loss_S = F.l1_loss(pred_S, ratings, reduction='sum')
            opt_S.zero_grad()
            loss_S.backward()

            opt_T.step()
            opt_trans.step()
            opt_S.step()

            batch_step += 1
            total_loss += loss_S.item()         # summing over all loss of source network
            total_samples += len(pred_S)

            if batch_step % 500 == 0:           # valid per 500 steps
                model_S.eval()
                valid_mse = calculate_mse(model_S, valid_dataloader, config.device)
                if best_loss > valid_mse:
                    best_loss = valid_mse
                    torch.save(model_S, model_path)
                print(f"{date()}###### Step {batch_step:3d}; validation mse {valid_mse:.6f}")
                model_S.train()

        lr_sch_S.step()
        lr_sch_trans.step()
        lr_sch_T.step()
        valid_mse = calculate_mse(model_S, valid_dataloader, config.device)
        if best_loss > valid_mse:
            best_loss = valid_mse
            torch.save(model_S, model_path)
        print(f"{date()}#### Epoch {epoch:3d}; train mse {total_loss/total_samples:.6f}; validation mse {valid_mse:.6f}")

    print(f'{date()}## End of training!')

###########################################################

# Algorithm 3. Testing using TransNet
    ## Step 3. Final Calculation

def test(dataloader, best_model, config):
    print(f'{date()}## Start the testing!')
    test_loss = calculate_mse(best_model, dataloader, config.device)
    print(f"{date()}## Test end, test mse is {test_loss:.6f}")

###########################################################

In [10]:
# 실행부
if __name__ == '__main__':
    config = Config()
    print(config)

    # 파일 경로 확인
    assert os.path.exists(config.train_file), "Train file not found"
    assert os.path.exists(config.valid_file), "Valid file not found"
    assert os.path.exists(config.test_file), "Test file not found"

    print("## Load word2vec and data...")
    word_emb = pickle.load(open('/content/drive/MyDrive/IDS/amaxon reviews 2023/Transnet/word_emb.pkl', 'rb'), encoding='iso-8859-1')
    word_dict = pickle.load(open('/content/drive/MyDrive/IDS/amaxon reviews 2023/Transnet/dict.pkl', 'rb'), encoding='iso-8859-1')

    # 데이터셋 생성
    train_dataset = TransNetsDataset(config.train_file, word_dict, config)
    valid_dataset = TransNetsDataset(config.valid_file, word_dict, config)
    test_dataset = TransNetsDataset(config.test_file, word_dict, config)

    # 데이터셋 크기 출력
    print(f"Train dataset size: {len(train_dataset)}")
    print(f"Valid dataset size: {len(valid_dataset)}")
    print(f"Test dataset size: {len(test_dataset)}")

    # 데이터 로더 생성
    train_dlr = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)
    valid_dlr = DataLoader(valid_dataset, batch_size=config.batch_size, shuffle=True)
    test_dlr = DataLoader(test_dataset, batch_size=config.batch_size, shuffle=True)

    source_model = SourceNet(config, word_emb, extend_model=config.extension).to(config.device)
    target_model = TargetNet(config, word_emb).to(config.device)

    os.makedirs(os.path.dirname(config.model_file), exist_ok=True)

    train(train_dlr, valid_dlr, source_model, target_model, config, config.model_file)
    test(test_dlr, torch.load(config.model_file), config)


<config.Config object at 0x7f17fe946620>
## Load word2vec and data...
Train dataset size: 12795
Valid dataset size: 1599
Test dataset size: 1600
2025-01-04 07:11:43## Start the training!
2025-01-04 07:11:45#### Initial train mse 15.709378, validation mse 15.620416
2025-01-04 07:11:51#### Epoch   0; train mse 2.310313; validation mse 3.651730
2025-01-04 07:11:52#### Epoch   1; train mse 1.546060; validation mse 3.505278
2025-01-04 07:11:53#### Epoch   2; train mse 1.504590; validation mse 3.447467
2025-01-04 07:11:54#### Epoch   3; train mse 1.482420; validation mse 3.295685
2025-01-04 07:11:55###### Step 500; validation mse 2.523619
2025-01-04 07:11:55#### Epoch   4; train mse 1.467472; validation mse 3.119757
2025-01-04 07:11:56#### Epoch   5; train mse 1.453596; validation mse 3.279993
2025-01-04 07:11:57#### Epoch   6; train mse 1.440044; validation mse 3.272980
2025-01-04 07:11:58#### Epoch   7; train mse 1.421326; validation mse 3.146543
2025-01-04 07:11:59#### Epoch   8; train ms

  test(test_dlr, torch.load(config.model_file), config)
