<a href="https://colab.research.google.com/github/creepereye1204/Crime-prevention-project/blob/renewal/FineTuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# dataset/
#     anchor/
#         anchor1.jpg
#         anchor2.jpg
#         ...
#     positive/
#         positive1.jpg
#         positive2.jpg
#         ...
#     negative/
#         negative1.jpg
#         negative2.jpg
#         ...


In [None]:
#CPU로

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from facenet_pytorch import InceptionResnetV1
from torchvision import transforms
from PIL import Image

# 1. 모델 정의
model = InceptionResnetV1(pretrained='vggface2').train()

# 2. 트립렛 손실 함수 정의
class TripletLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(TripletLoss, self).__init__()
        self.margin = margin

    def forward(self, anchor, positive, negative):
        pos_distance = (anchor - positive).pow(2).sum(1)  # 유클리드 거리의 제곱
        neg_distance = (anchor - negative).pow(2).sum(1)  # 유클리드 거리의 제곱
        loss = torch.relu(pos_distance - neg_distance + self.margin)
        return loss.mean()

criterion = TripletLoss(margin=1.0)

# 3. 옵티마이저 설정
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 데이터셋 클래스 정의
class TripletDataset(Dataset):
    def __init__(self, anchor_dir, positive_dir, negative_dir, transform=None):
        self.anchor_dir = anchor_dir
        self.positive_dir = positive_dir
        self.negative_dir = negative_dir
        self.transform = transform
        self.anchor_images = os.listdir(anchor_dir)
        self.positive_images = os.listdir(positive_dir)
        self.negative_images = os.listdir(negative_dir)

    def __len__(self):
        return min(len(self.anchor_images), len(self.positive_images), len(self.negative_images))

    def __getitem__(self, idx):
        anchor_path = os.path.join(self.anchor_dir, self.anchor_images[idx])
        positive_path = os.path.join(self.positive_dir, self.positive_images[idx])
        negative_path = os.path.join(self.negative_dir, self.negative_images[idx])

        anchor_image = Image.open(anchor_path).convert('RGB')
        positive_image = Image.open(positive_path).convert('RGB')
        negative_image = Image.open(negative_path).convert('RGB')

        if self.transform:
            anchor_image = self.transform(anchor_image)
            positive_image = self.transform(positive_image)
            negative_image = self.transform(negative_image)

        return anchor_image, positive_image, negative_image

# 이미지 전처리 변환 정의
transform = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# 폴더 경로 설정 (실제 경로로 대체 필요)
anchor_dir = 'path/to/dataset/anchor'
positive_dir = 'path/to/dataset/positive'
negative_dir = 'path/to/dataset/negative'

# 데이터 로더 정의
dataset = TripletDataset(anchor_dir, positive_dir, negative_dir, transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# 학습 루프
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(dataloader, 0):
        anchor, positive, negative = data
        optimizer.zero_grad()
        anchor_embedding = model(anchor)
        positive_embedding = model(positive)
        negative_embedding = model(negative)
        loss = criterion(anchor_embedding, positive_embedding, negative_embedding)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        if i % 10 == 9:  # 매 10 미니 배치마다 출력
            print(f'[Epoch {epoch + 1}, Batch {i + 1}] loss: {running_loss / 10:.3f}')
            running_loss = 0.0

print('Finished Training')

# 4. 임베딩 벡터 계산 및 분류 함수 정의
def get_embedding(model, image):
    model.eval()
    with torch.no_grad():
        embedding = model(image.unsqueeze(0))
    return embedding

# 예시: 앵커, 양성, 음성 이미지의 임베딩 벡터 계산
anchor_image = transform(Image.open('path/to/anchor_image.jpg').convert('RGB'))
positive_image = transform(Image.open('path/to/positive_image.jpg').convert('RGB'))
negative_image = transform(Image.open('path/to/negative_image.jpg').convert('RGB'))

anchor_embedding = get_embedding(model, anchor_image)
positive_embedding = get_embedding(model, positive_image)
negative_embedding = get_embedding(model, negative_image)

# 유클리드 거리 계산
pos_distance = torch.dist(anchor_embedding, positive_embedding, p=2).item()
neg_distance = torch.dist(anchor_embedding, negative_embedding, p=2).item()

# 임계값을 기준으로 분류
threshold = 0.5  # 임계값 설정
if pos_distance < threshold:
    print("Positive (같은 클래스)")
else:
    print("Negative (다른 클래스)")

print(f'Positive Distance: {pos_distance}')
print(f'Negative Distance: {neg_distance}')


In [None]:
# TPU로

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from facenet_pytorch import InceptionResnetV1
from torchvision import transforms
from PIL import Image
import torch_xla
import torch_xla.core.xla_model as xm
import torch_xla.distributed.parallel_loader as pl
import torch_xla.distributed.data_parallel as dp
import torch_xla.utils.serialization as xser

# 1. TPU 장치 설정
device = xm.xla_device()

# 2. 모델 정의
model = InceptionResnetV1(pretrained='vggface2').train().to(device)

# 3. 트립렛 손실 함수 정의
class TripletLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(TripletLoss, self).__init__()
        self.margin = margin

    def forward(self, anchor, positive, negative):
        pos_distance = (anchor - positive).pow(2).sum(1)  # 유클리드 거리의 제곱
        neg_distance = (anchor - negative).pow(2).sum(1)  # 유클리드 거리의 제곱
        loss = torch.relu(pos_distance - neg_distance + self.margin)
        return loss.mean()

criterion = TripletLoss(margin=1.0)

# 4. 옵티마이저 설정
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 데이터셋 클래스 정의
class TripletDataset(Dataset):
    def __init__(self, anchor_dir, positive_dir, negative_dir, transform=None):
        self.anchor_dir = anchor_dir
        self.positive_dir = positive_dir
        self.negative_dir = negative_dir
        self.transform = transform
        self.anchor_images = os.listdir(anchor_dir)
        self.positive_images = os.listdir(positive_dir)
        self.negative_images = os.listdir(negative_dir)

    def __len__(self):
        return min(len(self.anchor_images), len(self.positive_images), len(self.negative_images))

    def __getitem__(self, idx):
        anchor_path = os.path.join(self.anchor_dir, self.anchor_images[idx])
        positive_path = os.path.join(self.positive_dir, self.positive_images[idx])
        negative_path = os.path.join(self.negative_dir, self.negative_images[idx])

        anchor_image = Image.open(anchor_path).convert('RGB')
        positive_image = Image.open(positive_path).convert('RGB')
        negative_image = Image.open(negative_path).convert('RGB')

        if self.transform:
            anchor_image = self.transform(anchor_image)
            positive_image = self.transform(positive_image)
            negative_image = self.transform(negative_image)

        return anchor_image, positive_image, negative_image

# 이미지 전처리 변환 정의
transform = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# 폴더 경로 설정 (실제 경로로 대체 필요)
anchor_dir = 'path/to/dataset/anchor'
positive_dir = 'path/to/dataset/positive'
negative_dir = 'path/to/dataset/negative'

# 데이터 로더 정의
dataset = TripletDataset(anchor_dir, positive_dir, negative_dir, transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# 학습 루프
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(dataloader, 0):
        anchor, positive, negative = data
        anchor, positive, negative = anchor.to(device), positive.to(device), negative.to(device)

        optimizer.zero_grad()
        anchor_embedding = model(anchor)
        positive_embedding = model(positive)
        negative_embedding = model(negative)
        loss = criterion(anchor_embedding, positive_embedding, negative_embedding)
        loss.backward()
        xm.optimizer_step(optimizer)
        running_loss += loss.item()

        if i % 10 == 9:  # 매 10 미니 배치마다 출력
            print(f'[Epoch {epoch + 1}, Batch {i + 1}] loss: {running_loss / 10:.3f}')
            running_loss = 0.0

print('Finished Training')

# 4. 임베딩 벡터 계산 및 분류 함수 정의
def get_embedding(model, image):
    model.eval()
    with torch.no_grad():
        image = image.to(device)
        embedding = model(image.unsqueeze(0))
    return embedding

# 예시: 앵커, 양성, 음성 이미지의 임베딩 벡터 계산
anchor_image = transform(Image.open('path/to/anchor_image.jpg').convert('RGB'))
positive_image = transform(Image.open('path/to/positive_image.jpg').convert('RGB'))
negative_image = transform(Image.open('path/to/negative_image.jpg').convert('RGB'))

anchor_embedding = get_embedding(model, anchor_image)
positive_embedding = get_embedding(model, positive_image)
negative_embedding = get_embedding(model, negative_image)

# 유클리드 거리 계산
pos_distance = torch.dist(anchor_embedding, positive_embedding, p=2).item()
neg_distance = torch.dist(anchor_embedding, negative_embedding, p=2).item()

# 임계값을 기준으로 분류
threshold = 0.5  # 임계값 설정
if pos_distance < threshold:
    print("Positive (같은 클래스)")
else:
    print("Negative (다른 클래스)")

print(f'Positive Distance: {pos_distance}')
print(f'Negative Distance: {neg_distance}')


In [None]:
# GPU로

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from facenet_pytorch import InceptionResnetV1
from torchvision import transforms
from PIL import Image

# 1. GPU 장치 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 2. 모델 정의
model = InceptionResnetV1(pretrained='vggface2').train().to(device)

# 3. 트립렛 손실 함수 정의
class TripletLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(TripletLoss, self).__init__()
        self.margin = margin

    def forward(self, anchor, positive, negative):
        pos_distance = (anchor - positive).pow(2).sum(1)  # 유클리드 거리의 제곱
        neg_distance = (anchor - negative).pow(2).sum(1)  # 유클리드 거리의 제곱
        loss = torch.relu(pos_distance - neg_distance + self.margin)
        return loss.mean()

criterion = TripletLoss(margin=1.0)

# 4. 옵티마이저 설정
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 데이터셋 클래스 정의
class TripletDataset(Dataset):
    def __init__(self, anchor_dir, positive_dir, negative_dir, transform=None):
        self.anchor_dir = anchor_dir
        self.positive_dir = positive_dir
        self.negative_dir = negative_dir
        self.transform = transform
        self.anchor_images = os.listdir(anchor_dir)
        self.positive_images = os.listdir(positive_dir)
        self.negative_images = os.listdir(negative_dir)

    def __len__(self):
        return min(len(self.anchor_images), len(self.positive_images), len(self.negative_images))

    def __getitem__(self, idx):
        anchor_path = os.path.join(self.anchor_dir, self.anchor_images[idx])
        positive_path = os.path.join(self.positive_dir, self.positive_images[idx])
        negative_path = os.path.join(self.negative_dir, self.negative_images[idx])

        anchor_image = Image.open(anchor_path).convert('RGB')
        positive_image = Image.open(positive_path).convert('RGB')
        negative_image = Image.open(negative_path).convert('RGB')

        if self.transform:
            anchor_image = self.transform(anchor_image)
            positive_image = self.transform(positive_image)
            negative_image = self.transform(negative_image)

        return anchor_image, positive_image, negative_image

# 이미지 전처리 변환 정의
transform = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# 폴더 경로 설정 (실제 경로로 대체 필요)
anchor_dir = 'path/to/dataset/anchor'
positive_dir = 'path/to/dataset/positive'
negative_dir = 'path/to/dataset/negative'

# 데이터 로더 정의
dataset = TripletDataset(anchor_dir, positive_dir, negative_dir, transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# 학습 루프
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(dataloader, 0):
        anchor, positive, negative = data
        anchor, positive, negative = anchor.to(device), positive.to(device), negative.to(device)

        optimizer.zero_grad()
        anchor_embedding = model(anchor)
        positive_embedding = model(positive)
        negative_embedding = model(negative)
        loss = criterion(anchor_embedding, positive_embedding, negative_embedding)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        if i % 10 == 9:  # 매 10 미니 배치마다 출력
            print(f'[Epoch {epoch + 1}, Batch {i + 1}] loss: {running_loss / 10:.3f}')
            running_loss = 0.0

print('Finished Training')

# 4. 임베딩 벡터 계산 및 분류 함수 정의
def get_embedding(model, image):
    model.eval()
    with torch.no_grad():
        image = image.to(device)
        embedding = model(image.unsqueeze(0))
    return embedding

# 예시: 앵커, 양성, 음성 이미지의 임베딩 벡터 계산
anchor_image = transform(Image.open('path/to/anchor_image.jpg').convert('RGB'))
positive_image = transform(Image.open('path/to/positive_image.jpg').convert('RGB'))
negative_image = transform(Image.open('path/to/negative_image.jpg').convert('RGB'))

anchor_embedding = get_embedding(model, anchor_image)
positive_embedding = get_embedding(model, positive_image)
negative_embedding = get_embedding(model, negative_image)

# 유클리드 거리 계산
pos_distance = torch.dist(anchor_embedding, positive_embedding, p=2).item()
neg_distance = torch.dist(anchor_embedding, negative_embedding, p=2).item()

# 임계값을 기준으로 분류
threshold = 0.5  # 임계값 설정
if pos_distance < threshold:
    print("Positive (같은 클래스)")
else:
    print("Negative (다른 클래스)")

print(f'Positive Distance: {pos_distance}')
print(f'Negative Distance: {neg_distance}')


In [None]:
# 크롤링

In [2]:
import os
import requests
from bs4 import BeautifulSoup

# 검색어 설정
query = '한국 연예인'  # 검색어를 변경할 수 있습니다.
url = f'https://search.naver.com/search.naver?where=image&sm=tab_jum&query={query}'

# 요청 헤더 설정 (네이버는 User-Agent 확인을 통해 봇을 차단할 수 있음)
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}

response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')

# 이미지 URL 수집
image_urls = []
for img_tag in soup.find_all('img'):
    try:
        img_url = img_tag['data-source']
        image_urls.append(img_url)
    except KeyError:
        # data-source 속성이 없는 경우 건너뜀
        continue

# 이미지 다운로드 디렉터리 설정
os.makedirs('korean_celeb_images', exist_ok=True)

# 이미지 다운로드
for i, img_url in enumerate(image_urls):
    try:
        img_response = requests.get(img_url)
        img_response.raise_for_status()  # 요청이 성공했는지 확인
        with open(f'korean_celeb_images/korean_celeb_{i}.jpg', 'wb') as file:
            file.write(img_response.content)
        print(f'Downloaded image {i + 1}')
    except requests.exceptions.RequestException as e:
        print(f'Failed to download image {i + 1}: {e}')

print('이미지 다운로드 완료')


이미지 다운로드 완료
