In [47]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [48]:
import tensorflow as tf

device_name = tf.test.gpu_device_name()

if device_name == '/device:GPU:0':
    print('Found GPU at: {}'.format(device_name))
else:
    raise SystemError('GPU device not found')

Found GPU at: /device:GPU:0


In [49]:
import torch

if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

In [50]:
!pip install transformers



In [51]:
import json

# JSON 파일 읽기 및 데이터 추출
def extract_captions_labels(json_file_path, num_samples = None):
    with open(json_file_path, "r") as json_file:
        data = json.load(json_file)

    captions_labels = []
    annotations = data["annotations"]

    if num_samples is not None:
        annotations = annotations[:num_samples]

    for annotation in annotations:
        caption = annotation["caption"]
        label = annotation["danger_score"]
        captions_labels.append((caption, label))

    return captions_labels

#데이터 추출
file1_captions_labels = extract_captions_labels("/content/drive/MyDrive/train_abnormal_dataset.json")

captions_labels = file1_captions_labels

print(len(captions_labels))

captions_labels = list(set(captions_labels))

print(len(captions_labels))

2091
1900


In [52]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

# 데이터 분리
sentences, labels = zip(*captions_labels)

In [53]:
from transformers import BertTokenizer

print('Loading BERT tokenizer...')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

Loading BERT tokenizer...


In [54]:
# 0번 문장으로 테스트
print(' Original: ', sentences[0])

print('Tokenized: ', tokenizer.tokenize(sentences[0]))

print('Token IDs: ', tokenizer.convert_tokens_to_ids(tokenizer.tokenize(sentences[0])))

 Original:  A woman wearing a bag is standing in the middle of the road.
Tokenized:  ['a', 'woman', 'wearing', 'a', 'bag', 'is', 'standing', 'in', 'the', 'middle', 'of', 'the', 'road', '.']
Token IDs:  [1037, 2450, 4147, 1037, 4524, 2003, 3061, 1999, 1996, 2690, 1997, 1996, 2346, 1012]


In [55]:
max_len = 0

for sent in sentences:

    input_ids = tokenizer.encode(sent, add_special_tokens=True)

    max_len = max(max_len, len(input_ids))

print('Max sentence length: ', max_len)

Max sentence length:  35


In [56]:
input_ids = []
attention_masks = []

for sent in sentences:

    encoded_dict = tokenizer.encode_plus(
                        sent,
                        add_special_tokens = True,
                        max_length = 64,
                        pad_to_max_length = True,
                        return_attention_mask = True,
                        return_tensors = 'pt',
                   )

    input_ids.append(encoded_dict['input_ids'])
    attention_masks.append(encoded_dict['attention_mask'])


input_ids = torch.cat(input_ids, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)
labels = torch.tensor(labels)

print('Original: ', sentences[0])
print('Token IDs:', input_ids[0])

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Original:  A woman wearing a bag is standing in the middle of the road.
Token IDs: tensor([ 101, 1037, 2450, 4147, 1037, 4524, 2003, 3061, 1999, 1996, 2690, 1997,
        1996, 2346, 1012,  102,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0])


In [57]:
from torch.utils.data import TensorDataset, random_split

dataset = TensorDataset(input_ids, attention_masks, labels)

train_size = int(0.95 * len(dataset))
val_size = len(dataset) - train_size

train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

print('{:>5,} training samples'.format(train_size))
print('{:>5,} validation samples'.format(val_size))

1,805 training samples
   95 validation samples


In [58]:
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler

batch_size = 32

train_dataloader = DataLoader(
            train_dataset,
            sampler = RandomSampler(train_dataset),
            batch_size = batch_size
        )

validation_dataloader = DataLoader(
            val_dataset,
            sampler = SequentialSampler(val_dataset),
            batch_size = batch_size
        )

In [59]:
from transformers import BertForSequenceClassification, AdamW, BertConfig

model = BertForSequenceClassification.from_pretrained(
    "bert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab.
    num_labels = 8,
    output_attentions = False,
    output_hidden_states = False,
)

model.cuda()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [60]:
optimizer = AdamW(model.parameters(),
                  lr = 2e-5,
                  eps = 1e-8
                )




In [61]:
from transformers import get_linear_schedule_with_warmup

epochs = 5

total_steps = len(train_dataloader) * epochs

scheduler = get_linear_schedule_with_warmup(optimizer,
                                            num_warmup_steps = 0,
                                            num_training_steps = total_steps)

In [62]:
import numpy as np

def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

In [63]:
import time
import datetime

def format_time(elapsed):
    '''
    Takes a time in seconds and returns a string hh:mm:ss
    '''
    elapsed_rounded = int(round((elapsed)))

    return str(datetime.timedelta(seconds=elapsed_rounded))


In [64]:
import random
import numpy as np
import time
import torch
from torch.utils.data import DataLoader, TensorDataset, RandomSampler, SequentialSampler
from torch.nn import functional as F
from transformers import BertForSequenceClassification, BertTokenizer, AdamW, get_linear_schedule_with_warmup
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.model_selection import train_test_split
import json
from collections import defaultdict

# Seed 설정
seed_val = 42
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

# 모델 및 토크나이저 로드
model = BertForSequenceClassification.from_pretrained('/content/drive/MyDrive/bert', num_labels=8)
tokenizer = BertTokenizer.from_pretrained('/content/drive/MyDrive/bert')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# JSON 파일 읽기 및 데이터 추출 함수
def extract_captions_labels(json_file_path):
    with open(json_file_path, "r") as json_file:
        data = json.load(json_file)

    captions_labels = []
    annotations = data["annotations"]
    images = {image["id"]: image["file_name"] for image in data["images"]}

    for annotation in annotations:
        image_id = annotation["image_id"]
        caption = annotation["caption"]
        label = annotation["danger_score"]
        file_name = images[image_id]
        captions_labels.append((file_name, caption, label))

    return captions_labels

# 데이터 추출
file1_captions_labels = extract_captions_labels("/content/drive/MyDrive/train_abnormal_dataset.json")

# 클래스별 데이터 분리
class_to_data = defaultdict(list)

for file_name, caption, label in file1_captions_labels:
    class_name = file_name.split("000")[0]  # "fire00000001.png" -> "fire"
    class_to_data[class_name].append((caption, label))

# 라벨 맵 정의
label_map = {0: 'Label_0(정상)', 1: 'Label_1(위험)', 2: 'Label_2(위험)',
             3: 'Label_3(위험)', 4: 'Label_4(위험)', 5: 'Label_5(위험)',
             6: 'Label_6(위험)', 7: 'Label_7(위험)'}

# 데이터 분리 (클래스별로 80장씩 검증 데이터 확보)
train_data = []
val_data = []

validation_count_per_class = 80  # 각 클래스별 검증 데이터 샘플 수

for class_name, data in class_to_data.items():
    random.shuffle(data)
    val_class_data = data[:validation_count_per_class]
    train_class_data = data[validation_count_per_class:]

    train_data.extend([(class_name, *item) for item in train_class_data])
    val_data.extend([(class_name, *item) for item in val_class_data])

# 데이터 확인
print(f"Number of training samples: {len(train_data)}")
print(f"Number of validation samples: {len(val_data)}")

# 클래스별 검증 데이터 분포 확인
val_class_distribution = defaultdict(int)
for class_name, _, _ in val_data:
    val_class_distribution[class_name] += 1

print("\nValidation Data Class Distribution:")
for class_name, count in val_class_distribution.items():
    print(f"Class: {class_name}, Count: {count}")

# 학습 데이터 로더 생성
train_sentences, train_labels = zip(*[(caption, label) for _, caption, label in train_data])
train_inputs = tokenizer(list(train_sentences), padding=True, truncation=True, return_tensors="pt")
train_labels = torch.tensor(train_labels)

train_dataset = TensorDataset(train_inputs["input_ids"], train_inputs["attention_mask"], train_labels)
train_dataloader = DataLoader(train_dataset, sampler=RandomSampler(train_dataset), batch_size=16)

# 검증 데이터 로더 생성
val_sentences, val_labels = zip(*[(caption, label) for _, caption, label in val_data])
val_inputs = tokenizer(list(val_sentences), padding=True, truncation=True, return_tensors="pt")
val_labels = torch.tensor(val_labels)

val_dataset = TensorDataset(val_inputs["input_ids"], val_inputs["attention_mask"], val_labels)
validation_dataloader = DataLoader(val_dataset, sampler=SequentialSampler(val_dataset), batch_size=16)

# 옵티마이저 및 스케줄러 설정
optimizer = AdamW(model.parameters(),lr=2e-5, eps=1e-8)
epochs = 3
total_steps = len(train_dataloader) * epochs

scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

# 학습 루프
training_stats = []
total_t0 = time.time()

for epoch_i in range(epochs):
    print(f"\n======== Epoch {epoch_i + 1} / {epochs} ========")
    print("Training...")

    t0 = time.time()
    total_train_loss = 0

    model.train()

    for step, batch in enumerate(train_dataloader):
        if step % 40 == 0 and step > 0:
            elapsed = time.time() - t0
            print(f"  Batch {step} of {len(train_dataloader)}. Elapsed: {elapsed:.2f} seconds.")

        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)

        model.zero_grad()

        outputs = model(
            b_input_ids,
            attention_mask=b_input_mask,
            labels=b_labels,
            return_dict=True
        )
        loss = outputs.loss
        logits = outputs.logits

        total_train_loss += loss.item()

        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()

    avg_train_loss = total_train_loss / len(train_dataloader)
    print(f"\n  Average training loss: {avg_train_loss:.2f}")

# 검증 루프
print("\nRunning Validation...")
model.eval()

metrics_per_class = {}
mismatched_samples_per_class = defaultdict(list)

for class_name, val_data_per_class in class_to_data.items():
    class_val_sentences = [caption for caption, _ in val_data_per_class]
    class_val_labels = [label for _, label in val_data_per_class]

    val_inputs = tokenizer(list(class_val_sentences), padding=True, truncation=True, return_tensors="pt")
    val_input_ids = val_inputs["input_ids"].to(device)
    val_attention_masks = val_inputs["attention_mask"].to(device)

    with torch.no_grad():
        outputs = model(val_input_ids, attention_mask=val_attention_masks)
        logits = outputs.logits

    predicted_labels = torch.argmax(logits, dim=1).cpu().numpy()
    true_labels = np.array(class_val_labels)

    precision = precision_score(true_labels, predicted_labels, average='weighted', zero_division=0)
    recall = recall_score(true_labels, predicted_labels, average='weighted', zero_division=0)
    f1 = f1_score(true_labels, predicted_labels, average='weighted', zero_division=0)
    accuracy = accuracy_score(true_labels, predicted_labels)

    metrics_per_class[class_name] = {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1_score": f1
    }

    for i, (true_label, pred_label) in enumerate(zip(true_labels, predicted_labels)):
        if true_label != pred_label:
            mismatched_samples_per_class[class_name].append({
                "Input": class_val_sentences[i],
                "True Label": label_map[true_label],
                "Predicted Label": label_map[pred_label]
            })

print("\nClassification Metrics per Class:")
for class_name, metrics in metrics_per_class.items():
    print(f"Class: {class_name}")
    print(f"  Accuracy: {metrics['accuracy']:.4f}")
    print(f"  Precision: {metrics['precision']:.4f}")
    print(f"  Recall: {metrics['recall']:.4f}")
    print(f"  F1 Score: {metrics['f1_score']:.4f}")


Number of training samples: 1691
Number of validation samples: 400

Validation Data Class Distribution:
Class: fire, Count: 80
Class: traffic_accident, Count: 80
Class: fight, Count: 80
Class: caution, Count: 80
Class: fall, Count: 80

Training...
  Batch 40 of 106. Elapsed: 5.44 seconds.
  Batch 80 of 106. Elapsed: 11.90 seconds.

  Average training loss: 0.44

Training...
  Batch 40 of 106. Elapsed: 6.12 seconds.
  Batch 80 of 106. Elapsed: 13.35 seconds.

  Average training loss: 0.28

Training...
  Batch 40 of 106. Elapsed: 5.28 seconds.
  Batch 80 of 106. Elapsed: 10.48 seconds.

  Average training loss: 0.19

Running Validation...

Classification Metrics per Class:
Class: fire
  Accuracy: 0.8486
  Precision: 0.8487
  Recall: 0.8486
  F1 Score: 0.8446
Class: traffic_accident
  Accuracy: 0.9811
  Precision: 0.9816
  Recall: 0.9811
  F1 Score: 0.9802
Class: fight
  Accuracy: 0.9959
  Precision: 0.9959
  Recall: 0.9959
  F1 Score: 0.9959
Class: caution
  Accuracy: 0.9732
  Precision:

In [65]:
# 라벨별 정밀도, 재현율, F1-점수 출력
print("\nClassification Metrics per Class:")
for i, label in label_map.items():
    precision = precision_score(all_labels, all_preds, labels=[i], average='weighted', zero_division=0)
    recall = recall_score(all_labels, all_preds, labels=[i], average='weighted', zero_division=0)
    f1 = f1_score(all_labels, all_preds, labels=[i], average='weighted', zero_division=0)
    print(f"Class: {label}")
    print(f"  Precision: {precision:.4f}")
    print(f"  Recall: {recall:.4f}")
    print(f"  F1 Score: {f1:.4f}")


Classification Metrics per Class:
Class: Label_0(정상)
  Precision: 0.0000
  Recall: 0.0000
  F1 Score: 0.0000
Class: Label_1(위험)
  Precision: 0.0000
  Recall: 0.0000
  F1 Score: 0.0000
Class: Label_2(위험)
  Precision: 0.6667
  Recall: 1.0000
  F1 Score: 0.8000
Class: Label_3(위험)
  Precision: 0.9474
  Recall: 0.8852
  F1 Score: 0.9153
Class: Label_4(위험)
  Precision: 0.8958
  Recall: 0.9348
  F1 Score: 0.9149
Class: Label_5(위험)
  Precision: 0.9097
  Recall: 0.9357
  F1 Score: 0.9225
Class: Label_6(위험)
  Precision: 0.7984
  Recall: 0.8512
  F1 Score: 0.8240
Class: Label_7(위험)
  Precision: 0.7895
  Recall: 0.6122
  F1 Score: 0.6897


In [71]:
# 라벨별 데이터 개수 계산
from collections import defaultdict, Counter

train_labels = [label for _, _, label in train_data]
label_counts = Counter(train_labels)

print("\nTraining Data Label Counts:")
for label, count in label_counts.items():
    label_name = label_map.get(label, f"Label_{label}")
    print(f"{label_name}: {count} samples")



Training Data Label Counts:
Label_6(위험): 430 samples
Label_7(위험): 197 samples
Label_5(위험): 591 samples
Label_3(위험): 235 samples
Label_4(위험): 211 samples
Label_0(정상): 1 samples
Label_2(위험): 26 samples


In [67]:
model.save_pretrained('/content/drive/MyDrive/bert')
tokenizer.save_pretrained('/content/drive/MyDrive/bert')

('/content/drive/MyDrive/bert/tokenizer_config.json',
 '/content/drive/MyDrive/bert/special_tokens_map.json',
 '/content/drive/MyDrive/bert/vocab.txt',
 '/content/drive/MyDrive/bert/added_tokens.json')

## 정답과 예측 값 비교 및 정답과 예측이 다른 캡션 추출

In [68]:
# 정답과 예측이 다른 샘플 통합
mismatched_samples = []  # 정답과 예측이 다른 샘플 저장

for class_name, mismatched_samples_in_class in mismatched_samples_per_class.items():
    for sample in mismatched_samples_in_class:
        mismatched_samples.append({
            "Class": class_name,
            "Input": sample["Input"],
            "True Label": sample["True Label"],
            "Predicted Label": sample["Predicted Label"]
        })

# 정답과 예측이 다른 샘플 출력
print("\nMismatched Samples:")
for idx, sample in enumerate(mismatched_samples, 1):
    print(f"[{idx}] Input: {sample['Input']}")
    print(f"  Class: {sample['Class']}")
    print(f"  True Label: {sample['True Label']} | Predicted Label: {sample['Predicted Label']}")
    print("=" * 50)

# 정답과 예측이 다른 샘플 총 개수 출력
print(f"\nTotal Mismatched Samples: {len(mismatched_samples)}")



Mismatched Samples:
[1] Input: The building across from the construction site is on fire, and there are many people at the construction site.
  Class: fire
  True Label: Label_6(위험) | Predicted Label: Label_7(위험)
[2] Input: There is a fire burning in the grass, and next to it are two men and a car.
  Class: fire
  True Label: Label_6(위험) | Predicted Label: Label_7(위험)
[3] Input: A woman in a white coat stands outside a building on fire.
  Class: fire
  True Label: Label_7(위험) | Predicted Label: Label_6(위험)
[4] Input: A fire is burning among the parked cars on the boat.
  Class: fire
  True Label: Label_7(위험) | Predicted Label: Label_6(위험)
[5] Input: The black car next to the tree is on fire, and several cars are coming behind it.
  Class: fire
  True Label: Label_7(위험) | Predicted Label: Label_6(위험)
[6] Input: A fire breaks out in the house and a man in white clothes falls to the floor.
  Class: fire
  True Label: Label_7(위험) | Predicted Label: Label_6(위험)
[7] Input: There is a fire b

## 전체 데이터셋 F1 Score 계산

In [69]:
import numpy as np

# 전체 데이터 결합
all_sentences = []
all_true_labels = []

for class_name, data in class_to_data.items():
    sentences, true_labels = zip(*data)
    all_sentences.extend(sentences)
    all_true_labels.extend(true_labels)

# Tokenizer로 전체 데이터 변환
inputs = tokenizer(list(all_sentences), padding=True, truncation=True, return_tensors="pt")
input_ids = inputs["input_ids"].to(device)
attention_mask = inputs["attention_mask"].to(device)

# 모델 예측
model.eval()
with torch.no_grad():
    outputs = model(input_ids=input_ids, attention_mask=attention_mask)
    logits = outputs.logits
    all_predicted_labels = torch.argmax(logits, dim=1).cpu().numpy()

# 전체 성능 평가 (weighted average)
precision = precision_score(all_true_labels, all_predicted_labels, average='weighted')
recall = recall_score(all_true_labels, all_predicted_labels, average='weighted')
f1 = f1_score(all_true_labels, all_predicted_labels, average='weighted')
accuracy = accuracy_score(all_true_labels, all_predicted_labels)

# 결과 출력
print("\nOverall Classification Metrics:")
print(f"  Accuracy: {accuracy:.4f}")
print(f"  Precision: {precision:.4f}")
print(f"  Recall: {recall:.4f}")
print(f"  F1 Score: {f1:.4f}")



Overall Classification Metrics:
  Accuracy: 0.9555
  Precision: 0.9553
  Recall: 0.9555
  F1 Score: 0.9546


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
