In [1]:
# transformers 모듈 설치
# pip install transformers
# 모델명: google/vit-base-patch16-224

In [2]:
from transformers import ViTImageProcessor, ViTForImageClassification
from PIL import Image
import requests

url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
image = Image.open(requests.get(url, stream=True).raw)

processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224')
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')

inputs = processor(images=image, return_tensors="pt")
outputs = model(**inputs)
logits = outputs.logits
# model predicts one of the 1000 ImageNet classes
predicted_class_idx = logits.argmax(-1).item()
print("Predicted class:", model.config.id2label[predicted_class_idx])


Predicted class: Egyptian cat


In [3]:
# 모델명: facebook/detr-resnet-50

In [4]:
from transformers import DetrImageProcessor, DetrForObjectDetection
import torch
from PIL import Image
import requests

#url = "http://images.cocodataset.org/val2017/000000039769.jpg"
url = "https://media.triple.guide/triple-cms/c_limit,f_auto,h_2048,w_2048/3af8395b-4e75-45fe-8836-32dc84569622.jpeg"
image = Image.open(requests.get(url, stream=True).raw)

# you can specify the revision tag if you don't want the timm dependency
#timm 종속성을 원하지 않는 경우 revision 태그를 지정할 수 있습니다
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50", revision="no_timm")

inputs = processor(images=image, return_tensors="pt")
outputs = model(**inputs)

# convert outputs (bounding boxes and class logits) to COCO API
# let's only keep detections with score > 0.9
#출력을 COCO API로 변환합니다 (바운딩 박스 및 클래스 로짓)
#점수가 0.9 이상인 검출만 유지합니다
target_sizes = torch.tensor([image.size[::-1]])
results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]

for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
    box = [round(i, 2) for i in box.tolist()]
    print(
            f"Detected {model.config.id2label[label.item()]} with confidence "
            f"{round(score.item(), 3)} at location {box}"
    )

    

Detected traffic light with confidence 0.984 at location [938.32, 649.8, 963.71, 695.6]
Detected person with confidence 0.992 at location [872.5, 901.74, 934.02, 1072.96]
Detected person with confidence 0.918 at location [1114.77, 916.1, 1175.01, 1079.41]
Detected person with confidence 0.93 at location [1115.2, 911.77, 1173.35, 1079.51]
Detected person with confidence 0.931 at location [1294.0, 917.98, 1347.73, 1059.47]
Detected person with confidence 0.935 at location [1172.62, 909.66, 1214.56, 1043.15]
Detected person with confidence 0.941 at location [1090.04, 730.46, 1179.47, 856.33]
Detected car with confidence 0.996 at location [677.88, 931.63, 781.33, 998.13]
Detected car with confidence 0.998 at location [356.99, 912.97, 586.5, 1047.51]
Detected person with confidence 0.976 at location [1057.9, 905.58, 1106.6, 1079.4]
Detected person with confidence 0.916 at location [924.7, 891.87, 974.19, 998.2]
Detected person with confidence 0.961 at location [927.49, 897.07, 979.31, 1073.

In [5]:
# 모델명: nvidia/segformer-b0-finetuned-ade-512-512

In [8]:
from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
from PIL import Image
import requests
import torch

# 모델과 프로세서 초기화
processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")

# 이미지 로드
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)

# 이미지 처리
inputs = processor(images=image, return_tensors="pt")

# 모델 예측
with torch.no_grad():
    outputs = model(**inputs)

# 로짓 추출
logits = outputs.logits  # shape (batch_size, num_labels, height/4, width/4)

print(logits.shape)


torch.Size([1, 150, 128, 128])


In [9]:
# 모델명: openai-community/gpt2
# TensorFlow로 해보기

In [17]:
from transformers import GPT2Tokenizer, TFGPT2LMHeadModel
import tensorflow as tf

# 토크나이저와 모델 초기화
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = TFGPT2LMHeadModel.from_pretrained('gpt2')

# 입력 텍스트
text = "My name is Jiwon Lee. I am from South Korea and "
encoded_input = tokenizer(text, return_tensors='tf')

# 텍스트 생성
output_sequences = model.generate(
    input_ids=encoded_input['input_ids'],
    max_length=50,  # 생성할 텍스트의 최대 길이
    num_return_sequences=1,  # 생성할 텍스트의 수
    no_repeat_ngram_size=2,  # 반복 방지
    top_k=50,  # 상위 k개의 단어 중에서 선택
    top_p=0.95,  # 누적 확률이 0.95 이하인 단어들 중에서 선택
    temperature=0.7  # 샘플링 온도
)

# 생성된 텍스트 디코딩
generated_text = tokenizer.decode(output_sequences[0], skip_special_tokens=True)

print(generated_text)


All PyTorch model weights were used when initializing TFGPT2LMHeadModel.

All the weights of TFGPT2LMHeadModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


My name is Jiwon Lee. I am from South Korea and  I am a Korean American. My family is Korean Americans.
I have been living in the United States for over 10 years. In the past I have lived in a
