name: memit
channels:
  - pytorch
  - defaults
dependencies:
  - python=3.9.7
  - pip=21.2.4
  - cudatoolkit=11.3
  - pytorch==1.12.1
  - pip:
    - einops==0.4.0
    - higher==0.2.1
    - hydra-core==1.2.0
    - transformers==4.23.1
    - datasets==1.18.3
    - matplotlib==3.6.1
    - spacy==3.4.1
    - scipy==1.9.2
    - scikit-learn==1.0.2
    - nltk==3.7
    - jupyter==1.0.0

In [1]:
!pip install torch==1.12.1
!pip install einops==0.4.0
!pip install higher==0.2.1
!pip install hydra-core==1.2.0
!pip install transformers==4.23.1
!pip install datasets==1.18.3
!pip install matplotlib==3.6.1
!pip install spacy==3.4.1
!pip install scipy==1.9.2
!pip install scikit-learn==1.0.2
!pip install nltk==3.7
!pip install jupyter==1.0.0
# !pip install torchdata==0.4.0
# !pip install portalocker==2.0.0
# !pip install torchtext==0.13.0

Collecting numpy<1.26.0,>=1.18.5
  Using cached numpy-1.25.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.26.4
    Uninstalling numpy-1.26.4:
      Successfully uninstalled numpy-1.26.4
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torchtext 0.13.0 requires torch==1.12.0, but you have torch 1.12.1 which is incompatible.[0m
Successfully installed numpy-1.25.2


In [1]:
!nvidia-smi

Fri May  3 11:56:53 2024       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 520.61.05    Driver Version: 520.61.05    CUDA Version: 11.8     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA RTX A6000    On   | 00000000:31:00.0 Off |                    0 |
| 30%   38C    P8    19W / 300W |      3MiB / 46068MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA RTX A6000    On   | 00000000:4B:00.0 Off |                    0 |
| 30%   35C    P8    20W / 300W |      3MiB / 46068MiB |      0%      Default |
|       

In [1]:
import os

# GPU 1번만 사용하도록 설정
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

# 확인을 위해 현재 설정된 환경 변수 출력
print("Configured GPU:", os.environ['CUDA_VISIBLE_DEVICES'])


Configured GPU: 1


In [2]:
import torch
print("Available GPUs:", torch.cuda.device_count())
# torch.cuda.set_device(1)
print("Current GPU:", torch.cuda.current_device())


Available GPUs: 1
Current GPU: 0


In [3]:
import torch
from torch.utils.data import Dataset
# from torchtext.datasets import AG_NEWS
# from torchtext.data.functional import to_map_style_dataset
from transformers import XLNetTokenizer, XLNetForSequenceClassification, Trainer, TrainingArguments
from transformers import DataCollatorWithPadding
import numpy as np
import pandas as pd


# 데이터셋 다운로드
# train_dataset = AG_NEWS(split='train')
# test_dataset = AG_NEWS(split='test')

train_dataset = pd.read_csv('AG_NEWS/train.csv')
test_dataset = pd.read_csv('AG_NEWS/test.csv')


# XLNet tokenizer 및 모델 로드
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
model = XLNetForSequenceClassification.from_pretrained('xlnet-base-cased', num_labels=4)

# 데이터 전처리 함수 정의
def data_process(df):
    
    data = [tokenizer(item, truncation=True, padding='max_length', max_length=128) for item in df['Description']]
    labels = torch.tensor([int(item) - 1 for item in df['Class Index']])  # 레이블을 0부터 시작하도록 조정
    return data, labels

# 훈련 데이터와 테스트 데이터를 전처리합니다.
train_data, train_labels = data_process(train_dataset)
test_data, test_labels = data_process(test_dataset)

# 데이터셋 클래스 정의
class AGNewsDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = self.labels[idx]
        return item

    def __len__(self):
        return len(self.labels)

# Dataset 객체 생성
train_dataset = AGNewsDataset({k: v for k, v in zip(train_data[0].keys(), zip(*[d.values() for d in train_data]))}, train_labels)
test_dataset = AGNewsDataset({k: v for k, v in zip(test_data[0].keys(), zip(*[d.values() for d in test_data]))}, test_labels)

# TrainingArguments 설정
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=5,
    per_device_train_batch_size=256,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=100,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True
)

# 성능 메트릭 계산 함수
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=1)
    return {'accuracy': (predictions == labels).mean()}

# Trainer 설정
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,  # DataLoader 대신 Dataset 사용
    eval_dataset=test_dataset,    # DataLoader 대신 Dataset 사용
    compute_metrics=compute_metrics,
    data_collator=DataCollatorWithPadding(tokenizer)
)

# 훈련 실행
trainer.train()

# 테스트 데이터에 대한 평가 수행
results = trainer.evaluate()
print(results)


Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.bias', 'logits_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

Epoch,Training Loss,Validation Loss,Accuracy
1,0.2271,0.223828,0.925132
2,0.1741,0.18231,0.9375
3,0.1197,0.183105,0.943421
4,0.0813,0.205806,0.941842
5,0.0536,0.224922,0.939079


***** Running Evaluation *****
  Num examples = 7600
  Batch size = 8
Saving model checkpoint to ./results/checkpoint-469
Configuration saved in ./results/checkpoint-469/config.json
Model weights saved in ./results/checkpoint-469/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 7600
  Batch size = 8
Saving model checkpoint to ./results/checkpoint-938
Configuration saved in ./results/checkpoint-938/config.json
Model weights saved in ./results/checkpoint-938/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 7600
  Batch size = 8
Saving model checkpoint to ./results/checkpoint-1407
Configuration saved in ./results/checkpoint-1407/config.json
Model weights saved in ./results/checkpoint-1407/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 7600
  Batch size = 8
Saving model checkpoint to ./results/checkpoint-1876
Configuration saved in ./results/checkpoint-1876/config.json
Model weights saved in ./results/checkpoint-1876/pytorch_model.bin
**

{'eval_loss': 0.18231040239334106, 'eval_accuracy': 0.9375, 'eval_runtime': 22.7431, 'eval_samples_per_second': 334.167, 'eval_steps_per_second': 41.771, 'epoch': 5.0}
