30222변지민, Llama3-8B 모델 사용

In [None]:
!pip install transformers torch gradio pillow scikit-learn

Collecting gradio
  Downloading gradio-4.36.1-py3-none-any.whl (12.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.3/12.3 MB[0m [31m24.6 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.5

In [None]:

import os
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
from transformers import LlamaTokenizer, LlamaModel
import gradio as gr
from PIL import Image
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score


In [None]:

# 데이터 변환 정의
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# 데이터셋 로드
dataset = datasets.ImageFolder('PATH', transform=transform). #이미지 주소 삽입 필요!!
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


In [None]:

# Llama 모델 및 토크나이저 로드
model_id = 'meta-llama/Meta-Llama-3-8B-Instruct'
tokenizer = LlamaTokenizer.from_pretrained(model_id)
model = LlamaModel.from_pretrained(model_id, torch_dtype=torch.bfloat16).to('cuda' if torch.cuda.is_available() else 'cpu')

# 임베딩 추출 함수
def get_embedding(image):
    # 이미지를 RGB로 변환
    if image.mode != 'RGB':
        image = image.convert('RGB')
    inputs = tokenizer(image, return_tensors="pt").to('cuda' if torch.cuda.is_available() else 'cpu')
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).squeeze()

# 임베딩 데이터셋 생성
def create_embeddings(dataset):
    embeddings, labels = [], []
    for img, label in dataset:
        embedding = get_embedding(img)
        embeddings.append(embedding.cpu().numpy())
        labels.append(label)
    return torch.tensor(embeddings), torch.tensor(labels)

train_embeddings, train_labels = create_embeddings(train_dataset)
val_embeddings, val_labels = create_embeddings(val_dataset)


In [None]:

clf = LogisticRegression(max_iter=1000)
clf.fit(train_embeddings, train_labels)

# 검증
val_predictions = clf.predict(val_embeddings)
accuracy = accuracy_score(val_labels, val_predictions)
print(f'Validation Accuracy: {accuracy * 100:.2f}%')


In [None]:

def predict(image):
    embedding = get_embedding(image)
    prediction = clf.predict(embedding.unsqueeze(0).cpu().numpy())
    return dataset.classes[prediction[0]]

def classify_image(image):
    return predict(image)

interface = gr.Interface(
    fn=classify_image,
    inputs=gr.inputs.Image(type="pil"),
    outputs="text",
    title="예측기",
    description="이미지 업로드"
)

interface.launch()
