## 환경설정

1. http://braincrew2.iptime.org:8001 에 접속하여 회원가입해 주세요. (비밀번호는 단순하게 기입하시는 것을 권장 드려요. 예. 1234)
2. `username` 에 이메일 형식의 아이디를 기입해 주세요.
3. `password` 에 비밀번호를 기입해 주세요.


In [None]:
project = "BBCTEXT"  # 수정하지 마세요
username = ""  # 이메일아이디 (예시. abc@hello.com)
password = ""  # 비밀번호

아래의 코드를 순서대로 실행해 주세요.


In [None]:
import os
import requests

if not os.path.exists("competition.py"):
    url = "https://link.teddynote.com/COMPT"
    file_name = "competition.py"
    response = requests.get(url)
    with open(file_name, "wb") as file:
        file.write(response.content)

In [None]:
import competition

# 파일 다운로드
competition.download_competition_files(
    f"https://link.teddynote.com/{project}", use_competition_url=False
)

100%|██████████| 1.80M/1.80M [00:00<00:00, 21.7MiB/s]


## 데이터 로드


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
import os

# Data 경로 설정
DATA_DIR = "data"

# 경고 무시
warnings.filterwarnings("ignore")

SEED = 123

train = pd.read_csv(os.path.join(DATA_DIR, "train.csv"))
test = pd.read_csv(os.path.join(DATA_DIR, "test.csv"))

In [None]:
# train 데이터셋 확인
train.head()

Unnamed: 0,content,label
0,officials respond in court row australian tenn...,sport
1,slow start to speedy net services faster broad...,tech
2,amnesty chief laments war failure the lack of ...,politics
3,dal maso in to replace bergamasco david dal ma...,sport
4,technology gets the creative bug the hi-tech a...,tech


In [None]:
# test 데이터셋 확인
test.head()

Unnamed: 0,content
0,child access laws shake-up parents who refuse ...
1,fry set for role in hitchhiker s actor stephen...
2,palestinian economy in decline despite a short...
3,japanese banking battle at an end japan s sumi...
4,manufacturing recovery slowing uk manufactur...


## 토큰화 (Word Tokenization)

- get_tokenizer로 토크나이저 생성
- `basic_english`, `spacy`, `revtok`, `subword` 등 지정이 가능하나, 몇몇 토크나이저는 추가 라이브러리 설치가 필요합니다.


In [None]:
from torchtext.data.utils import get_tokenizer

# 토큰 생성
tokenizer = get_tokenizer("basic_english")

## 단어사전 생성


In [None]:
from torchtext.vocab import build_vocab_from_iterator


def yield_tokens(sentences):
    for text in sentences:
        yield tokenizer(text)

`build_vocab_from_iterator` 를 활용하여 단어 사전을 생성합니다.

- `min_freq`: 최소 빈도의 토큰의 개수를 입력합니다.
- `max_tokens`: 최대 빈도 토큰의 수를 한정합니다. 빈도수 기준으로 산정합니다.


In [None]:
vocab = build_vocab_from_iterator(
    yield_tokens(train["content"].tolist()),  # 텍스트 Iterator
    # 스페셜 토큰
    specials=["<UNK>"],
    min_freq=2,  # 최소 빈도 토큰
    max_tokens=1000,  # 최대 토큰 개수
)

vocab.set_default_index(vocab["<UNK>"])

## 단어사전의 개수 출력


In [None]:
# 전체 단어사전의 개수 출력
len(vocab)

1000

## 라벨 맵 생성 (문자 -> 숫자 변환)


In [None]:
# label 맵과 idx_to_label 생성
label_map = {v: i for i, v in enumerate(train["label"].value_counts().keys())}
idx_to_label = {idx: lbl for lbl, idx in label_map.items()}

{'business': 0, 'sport': 1, 'politics': 2, 'tech': 3, 'entertainment': 4}

In [None]:
train["label_num"] = train["label"].map(label_map)

## Dataset 분할


In [None]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(
    train["content"], train["label_num"], test_size=0.2, random_state=SEED
)

## Dataset 생성


In [None]:
from torch.utils.data import DataLoader, Dataset


class CustomDataset(Dataset):
    def __init__(self, texts, labels, vocab, tokenizer, is_train=True):
        super().__init__()
        self.texts = texts
        self.is_train = is_train
        if is_train:
            self.labels = labels
        self.vocab = vocab
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts.iloc[idx]
        if self.is_train:
            label = self.labels.iloc[idx]
            return self.vocab(self.tokenizer(text)), label
        else:
            return self.vocab(self.tokenizer(text))

In [None]:
# Custom Dataset 생성
train_ds = CustomDataset(x_train, y_train, vocab=vocab, tokenizer=tokenizer)
valid_ds = CustomDataset(x_test, y_test, vocab=vocab, tokenizer=tokenizer)
test_ds = CustomDataset(
    test["content"], None, vocab=vocab, tokenizer=tokenizer, is_train=False
)

In [None]:
# 1개의 데이터 추출
text, label = next(iter(train_ds))
len(text), label

(281, 0)

## DataLoader 생성


In [None]:
import torch
import torch.nn as nn
from torch.nn.utils.rnn import pad_sequence

# CUDA 사용 가능 여부 확인
if torch.backends.mps.is_built():
    # mac os mps 지원 체크
    device = torch.device("mps" if torch.backends.mps.is_built() else "cpu")
else:
    # cuda 사용 가능한지 체크
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

mps


In [None]:
from torch.nn.utils.rnn import pad_sequence


def collate_batch(batch, max_sequence_length, is_train=True):
    if is_train:
        label_list, text_list = [], []

        for text, label in batch:
            # 최대 문장길이를 넘어가는 단어는 제거합니다.
            processed_text = torch.tensor(text[:max_sequence_length], dtype=torch.int64)
            text_list.append(processed_text)
            label_list.append(label)

        label_list = torch.tensor(label_list, dtype=torch.int64)

        # padding을 주어 짧은 문장에 대한 길이를 맞춥니다.
        text_list = pad_sequence(text_list, batch_first=True, padding_value=0)

        return text_list.to(device), label_list.to(device)

    else:
        text_list = []

        for text in batch:
            # 최대 문장길이를 넘어가는 단어는 제거합니다.
            processed_text = torch.tensor(text[:max_sequence_length], dtype=torch.int64)
            text_list.append(processed_text)

        # padding을 주어 짧은 문장에 대한 길이를 맞춥니다.
        text_list = pad_sequence(text_list, batch_first=True, padding_value=0)

        return text_list.to(device)

In [None]:
# 한 문장에 최대 포함하는 단어의 개수를 지정합니다. (예시. 120 단어)
MAX_SEQUENCE_LENGTH = 120

train_loader = DataLoader(
    train_ds,
    batch_size=32,
    shuffle=True,
    collate_fn=lambda x: collate_batch(x, MAX_SEQUENCE_LENGTH),
)

validation_loader = DataLoader(
    valid_ds,
    batch_size=32,
    shuffle=False,
    collate_fn=lambda x: collate_batch(x, MAX_SEQUENCE_LENGTH),
)

test_loader = DataLoader(
    test_ds,
    batch_size=1,
    shuffle=False,
    collate_fn=lambda x: collate_batch(x, MAX_SEQUENCE_LENGTH, is_train=False),
)

In [None]:
x, y = next(iter(train_loader))
x = x.to(device)
y = y.to(device)

# x, y의 shape 확인
# (batch_size, seq_length), (batch_size)
x.shape, y.shape

(torch.Size([32, 120]), torch.Size([32]))

## 모델


In [None]:
from tqdm import tqdm  # Progress Bar 출력
import numpy as np
import torch.nn as nn
import torch.optim as optim


class TextClassificationModel(nn.Module):
    def __init__(
        self,
        num_classes,
        vocab_size,
        embedding_dim,
        hidden_size,
        num_layers,
        bidirectional=True,
        drop_prob=0.1,
    ):
        super(TextClassificationModel, self).__init__()
        self.num_classes = num_classes
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.bidirectional = 2 if bidirectional else 1

        self.embedding = nn.Embedding(
            num_embeddings=vocab_size, embedding_dim=embedding_dim
        )

        self.lstm = nn.LSTM(
            input_size=embedding_dim,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=bidirectional,
        )

        self.dropout = nn.Dropout(drop_prob)

        self.relu = nn.ReLU()

        self.fc = nn.Linear(hidden_size * self.bidirectional, hidden_size)
        self.output = nn.Linear(hidden_size, num_classes)

    def init_hidden_and_cell_state(self, batch_size, device):
        # LSTM 입력시 초기 Cell 에 대한 가중치 초기화를 진행합니다.
        # (num_layers*bidirectional, batch_size, hidden_size)
        self.hidden_and_cell = (
            torch.zeros(
                self.num_layers * self.bidirectional, batch_size, self.hidden_size
            ).to(device),
            torch.zeros(
                self.num_layers * self.bidirectional, batch_size, self.hidden_size
            ).to(device),
        )

    def forward(self, x):
        x = self.embedding(x)
        output, (h, c) = self.lstm(x, self.hidden_and_cell)
        # (batch_size, seq_length, hidden_size*bidirectional)
        # last sequence 의 (batch_size, hidden_size*bidirectional)
        h = output[:, -1, :]
        o = self.dropout(h)
        o = self.relu(self.fc(o))
        o = self.dropout(o)
        return self.output(o)

## 모델 생성


In [None]:
config = {
    "num_classes": 5,
    "vocab_size": len(vocab),
    "embedding_dim": 30,
    "hidden_size": 32,
    "num_layers": 2,
    "bidirectional": True,
}

model = TextClassificationModel(**config)
model.to(device)

TextClassificationModel(
  (embedding): Embedding(1000, 30)
  (lstm): LSTM(30, 32, num_layers=2, batch_first=True, bidirectional=True)
  (dropout): Dropout(p=0.1, inplace=False)
  (relu): ReLU()
  (fc): Linear(in_features=64, out_features=32, bias=True)
  (output): Linear(in_features=32, out_features=5, bias=True)
)

## 손실함수 및 옵티마이저 정의


In [None]:
# loss 정의: CrossEntropyLoss
loss_fn = nn.CrossEntropyLoss()

# 옵티마이저 정의: bert.paramters()와 learning_rate 설정
optimizer = optim.Adam(model.parameters(), lr=0.05)

In [None]:
from tqdm import tqdm


def fit(model, data_loader, loss_fn, optimizer, device, phase="train"):
    if phase == "train":
        # 모델을 훈련모드로 설정합니다. training mode 일 때 Gradient 가 업데이트 됩니다. 반드시 train()으로 모드 변경을 해야 합니다.
        model.train()
    else:
        # model.eval()은 모델을 평가모드로 설정을 바꾸어 줍니다.
        model.eval()

    # loss와 accuracy 계산을 위한 임시 변수 입니다. 0으로 초기화합니다.
    running_loss = 0
    corr = 0

    # 예쁘게 Progress Bar를 출력하면서 훈련 상태를 모니터링 하기 위하여 tqdm으로 래핑합니다.
    prograss_bar = tqdm(
        data_loader, leave=False, unit="batch", total=len(data_loader), mininterval=1
    )

    # mini-batch 학습을 시작합니다.
    for txt, lbl in prograss_bar:
        # image, label 데이터를 device에 올립니다.
        txt, lbl = txt.to(device), lbl.to(device)

        optimizer.zero_grad()
        # 누적 Gradient를 초기화 합니다.
        with torch.set_grad_enabled(phase == "train"):
            model.init_hidden_and_cell_state(len(txt), device)
            # Forward Propagation을 진행하여 결과를 얻습니다.
            output = model(txt)

            # 손실함수에 output, label 값을 대입하여 손실을 계산합니다.
            loss = loss_fn(output, lbl)

            if phase == "train":
                # 오차역전파(Back Propagation)을 진행하여 미분 값을 계산합니다.
                loss.backward()

                # 계산된 Gradient를 업데이트 합니다.
                optimizer.step()

        # output 의 뉴런별 확률 값을 sparse vector 로 변환합니다.
        pred = output.argmax(axis=1)

        # 정답 개수를 카운트 합니다.
        corr += (lbl == pred).sum().item()

        # 이를 누적한 뒤 Epoch 종료시 전체 데이터셋의 개수로 나누어 평균 loss를 산출합니다.
        running_loss += loss.item()

    # 누적된 정답수를 전체 개수로 나누어 주면 정확도가 산출됩니다.
    acc = corr / len(data_loader.dataset)

    # 평균 손실(loss)과 정확도를 반환합니다.
    # train_loss, train_acc
    return running_loss / len(data_loader), acc

In [None]:
import time

# 최대 Epoch을 지정합니다.
num_epochs = 50

min_loss = np.inf

STATE_DICT_PATH = "BBC-Text-Classification.pth"

# Epoch 별 훈련 및 검증을 수행합니다.
for epoch in range(num_epochs):
    # Model Training
    # 훈련 손실과 정확도를 반환 받습니다.
    start = time.time()
    train_loss, train_acc = fit(
        model, train_loader, loss_fn, optimizer, device, phase="train"
    )

    # 검증 손실과 검증 정확도를 반환 받습니다.
    val_loss, val_acc = fit(
        model, validation_loader, loss_fn, optimizer, device, phase="eval"
    )

    # val_loss 가 개선되었다면 min_loss를 갱신하고 model의 가중치(weights)를 저장합니다.
    if val_loss < min_loss:
        print(
            f"[INFO] val_loss has been improved from {min_loss:.5f} to {val_loss:.5f}. Saving Model!"
        )
        min_loss = val_loss
        torch.save(model.state_dict(), STATE_DICT_PATH)

    time_elapsed = time.time() - start
    # Epoch 별 결과를 출력합니다.
    print(
        f"[Epoch{epoch+1:02d}] time: {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s \t loss: {train_loss:.5f}, acc: {train_acc:.5f} | val_loss: {val_loss:.5f}, val_acc: {val_acc:.5f}"
    )

                                                  

[INFO] val_loss has been improved from inf to 1.54804. Saving Model!
[Epoch01] time: 0m 1s 	 loss: 1.73921, acc: 0.25351 | val_loss: 1.54804, val_acc: 0.32584


                                                  

[INFO] val_loss has been improved from 1.54804 to 1.33906. Saving Model!
[Epoch02] time: 0m 1s 	 loss: 1.45371, acc: 0.33708 | val_loss: 1.33906, val_acc: 0.35112


                                                  

[INFO] val_loss has been improved from 1.33906 to 1.31649. Saving Model!
[Epoch03] time: 0m 1s 	 loss: 1.35062, acc: 0.38062 | val_loss: 1.31649, val_acc: 0.43820


                                                  

[INFO] val_loss has been improved from 1.31649 to 1.16235. Saving Model!
[Epoch04] time: 0m 1s 	 loss: 1.22537, acc: 0.38834 | val_loss: 1.16235, val_acc: 0.42697


                                                  

[Epoch05] time: 0m 1s 	 loss: 1.13782, acc: 0.45646 | val_loss: 1.20286, val_acc: 0.46629


                                                  

[Epoch06] time: 0m 1s 	 loss: 1.10192, acc: 0.47472 | val_loss: 1.16629, val_acc: 0.49438


                                                  

[INFO] val_loss has been improved from 1.16235 to 1.14814. Saving Model!
[Epoch07] time: 0m 1s 	 loss: 1.03616, acc: 0.49228 | val_loss: 1.14814, val_acc: 0.54213


                                                  

[Epoch08] time: 0m 1s 	 loss: 1.02745, acc: 0.49228 | val_loss: 1.28084, val_acc: 0.51685


                                                  

[INFO] val_loss has been improved from 1.14814 to 1.14598. Saving Model!
[Epoch09] time: 0m 1s 	 loss: 0.97822, acc: 0.53792 | val_loss: 1.14598, val_acc: 0.46067


                                                  

[Epoch10] time: 0m 1s 	 loss: 0.94439, acc: 0.54565 | val_loss: 1.26801, val_acc: 0.55337


                                                  

[INFO] val_loss has been improved from 1.14598 to 1.14480. Saving Model!
[Epoch11] time: 0m 1s 	 loss: 0.97128, acc: 0.53371 | val_loss: 1.14480, val_acc: 0.47472


                                                  

[Epoch12] time: 0m 1s 	 loss: 1.02625, acc: 0.54143 | val_loss: 1.18371, val_acc: 0.50000


                                                  

[Epoch13] time: 0m 1s 	 loss: 1.03783, acc: 0.54775 | val_loss: 1.18875, val_acc: 0.42135


                                                  

[INFO] val_loss has been improved from 1.14480 to 1.12214. Saving Model!
[Epoch14] time: 0m 1s 	 loss: 1.16165, acc: 0.48244 | val_loss: 1.12214, val_acc: 0.51124


                                                  

[Epoch15] time: 0m 1s 	 loss: 1.15152, acc: 0.48666 | val_loss: 1.25575, val_acc: 0.49438


                                                  

[INFO] val_loss has been improved from 1.12214 to 1.10257. Saving Model!
[Epoch16] time: 0m 1s 	 loss: 1.08030, acc: 0.51124 | val_loss: 1.10257, val_acc: 0.54775


                                                  

[Epoch17] time: 0m 1s 	 loss: 1.00182, acc: 0.55337 | val_loss: 1.27186, val_acc: 0.52528


                                                  

[Epoch18] time: 0m 1s 	 loss: 1.03513, acc: 0.55758 | val_loss: 1.12043, val_acc: 0.60112


                                                  

[Epoch19] time: 0m 1s 	 loss: 0.98195, acc: 0.58708 | val_loss: 1.22516, val_acc: 0.51404


                                                  

[Epoch20] time: 0m 1s 	 loss: 1.01532, acc: 0.58778 | val_loss: 1.29248, val_acc: 0.47472


                                                  

[Epoch21] time: 0m 1s 	 loss: 1.01088, acc: 0.56039 | val_loss: 1.28032, val_acc: 0.45225


                                                  

[Epoch22] time: 0m 1s 	 loss: 1.05665, acc: 0.56671 | val_loss: 1.29050, val_acc: 0.48596


                                                  

[Epoch23] time: 0m 1s 	 loss: 0.99098, acc: 0.56882 | val_loss: 1.24929, val_acc: 0.48876


                                                  

[Epoch24] time: 0m 1s 	 loss: 0.95418, acc: 0.58778 | val_loss: 1.14880, val_acc: 0.57584


                                                  

[Epoch25] time: 0m 1s 	 loss: 1.04862, acc: 0.54705 | val_loss: 1.30636, val_acc: 0.48596


                                                  

[Epoch26] time: 0m 1s 	 loss: 1.12018, acc: 0.51615 | val_loss: 1.22341, val_acc: 0.48315


                                                  

[INFO] val_loss has been improved from 1.10257 to 1.10242. Saving Model!
[Epoch27] time: 0m 1s 	 loss: 1.08787, acc: 0.53160 | val_loss: 1.10242, val_acc: 0.53090


                                                  

[INFO] val_loss has been improved from 1.10242 to 0.99721. Saving Model!
[Epoch28] time: 0m 1s 	 loss: 1.00024, acc: 0.57514 | val_loss: 0.99721, val_acc: 0.56742


                                                  

[Epoch29] time: 0m 1s 	 loss: 0.93144, acc: 0.58638 | val_loss: 1.08381, val_acc: 0.55337


                                                  

[Epoch30] time: 0m 1s 	 loss: 0.87436, acc: 0.62360 | val_loss: 1.00890, val_acc: 0.57022


                                                  

[Epoch31] time: 0m 1s 	 loss: 0.90116, acc: 0.62640 | val_loss: 1.08643, val_acc: 0.58146


                                                  

[Epoch32] time: 0m 1s 	 loss: 0.87248, acc: 0.62219 | val_loss: 1.07115, val_acc: 0.50000


                                                  

[Epoch33] time: 0m 1s 	 loss: 0.92714, acc: 0.59691 | val_loss: 1.10900, val_acc: 0.49438


                                                  

[Epoch34] time: 0m 1s 	 loss: 0.88691, acc: 0.60885 | val_loss: 1.20281, val_acc: 0.53371


                                                  

[Epoch35] time: 0m 1s 	 loss: 0.96899, acc: 0.57303 | val_loss: 1.17705, val_acc: 0.53371


                                                  

[Epoch36] time: 0m 1s 	 loss: 0.94896, acc: 0.59270 | val_loss: 1.15528, val_acc: 0.50000


                                                  

[Epoch37] time: 0m 1s 	 loss: 0.97770, acc: 0.57444 | val_loss: 1.22656, val_acc: 0.51404


                                                  

[Epoch38] time: 0m 1s 	 loss: 0.98075, acc: 0.57935 | val_loss: 1.09690, val_acc: 0.60955


                                                  

[Epoch39] time: 0m 1s 	 loss: 0.93542, acc: 0.60042 | val_loss: 1.21284, val_acc: 0.58989


                                                  

[Epoch40] time: 0m 1s 	 loss: 1.00675, acc: 0.56882 | val_loss: 1.13800, val_acc: 0.60393


                                                  

[Epoch41] time: 0m 1s 	 loss: 0.93146, acc: 0.59691 | val_loss: 1.22316, val_acc: 0.40169


                                                  

[Epoch42] time: 0m 1s 	 loss: 0.96894, acc: 0.57584 | val_loss: 1.31261, val_acc: 0.53371


                                                  

[Epoch43] time: 0m 1s 	 loss: 0.99839, acc: 0.58216 | val_loss: 1.21779, val_acc: 0.55618


                                                  

[Epoch44] time: 0m 1s 	 loss: 0.98960, acc: 0.57584 | val_loss: 1.14833, val_acc: 0.57022


                                                  

[Epoch45] time: 0m 1s 	 loss: 0.94870, acc: 0.58076 | val_loss: 1.20729, val_acc: 0.57303


                                                  

[Epoch46] time: 0m 1s 	 loss: 0.90347, acc: 0.61025 | val_loss: 1.14267, val_acc: 0.54775


                                                  

[Epoch47] time: 0m 1s 	 loss: 0.92972, acc: 0.61447 | val_loss: 1.19454, val_acc: 0.51966


                                                  

[Epoch48] time: 0m 1s 	 loss: 0.96574, acc: 0.58287 | val_loss: 1.27584, val_acc: 0.57303


                                                  

[Epoch49] time: 0m 1s 	 loss: 0.89536, acc: 0.61306 | val_loss: 1.07941, val_acc: 0.53933


                                                  

[Epoch50] time: 0m 1s 	 loss: 0.85811, acc: 0.62430 | val_loss: 1.13345, val_acc: 0.51966




## 저장한 가중치 로드


In [None]:
# 모델에 저장한 가중치를 로드합니다.
model.load_state_dict(torch.load(STATE_DICT_PATH))

<All keys matched successfully>

## 최종 검증 손실 및 정확도 출력


In [None]:
# 최종 검증 손실(validation loss)와 검증 정확도(validation accuracy)를 산출합니다.
final_loss, final_acc = fit(
    model, validation_loader, loss_fn, optimizer, device, phase="eval"
)
print(
    f"\nevaluation loss: {final_loss:.5f}, evaluation accuracy: {final_acc:.5f}")

                                         


evaluation loss: 0.99721, evaluation accuracy: 0.56742




## 예측코드


In [None]:
predictions = []
model = model.to(device)
# 검증모드 진입
model.eval()

with torch.no_grad():
    # loss 초기화
    running_loss = 0
    # 정확도 계산
    running_acc = 0
    for x in test_loader:
        x = x.to(device)
        model.init_hidden_and_cell_state(len(x), device)
        y_hat = model(x)
        label = y_hat.argmax(dim=1).detach().item()
        predictions.append(label)

In [None]:
idx_to_label = {idx: lbl for lbl, idx in label_map.items()}
idx_to_label

{0: 'business', 1: 'sport', 2: 'politics', 3: 'tech', 4: 'entertainment'}

In [None]:
your_answer = [idx_to_label[p] for p in predictions]
your_answer[:5]

['politics', 'politics', 'politics', 'politics', 'business']

## 결과 제출

- 느리다고 중지 후 다시 평가 코드를 실행하는 경우 제출 과정에서 패널티가 발생할 수 있습니다. (제출 횟수 이슈 발생 가능)
- 제출결과는 [대회페이지](http://braincrew2.iptime.org:8001/competitions/BBCTEXT/)의 `리더보드` 와 `제출` 탭에서 확인할 수 있습니다.


아래 Cell을 실행하여 예측 결과 업데이트


In [None]:
import competition

# 예측 결과 업데이트
submission = pd.read_csv(os.path.join(DATA_DIR, "submission.csv"))
submission["label"] = your_answer

display(submission)
competition.submit(project, username, password, submission)

Unnamed: 0,label
0,politics
1,politics
2,politics
3,politics
4,business
...,...
440,politics
441,business
442,entertainment
443,business


아이디:  sample@test.com
파일명:  submissions/20240227-175327-submission.csv
[제출에 성공하였습니다]
제출 결과: 0.5325842696629214
