In [1]:
!pip install ultralytics
# !pip install pytorch

Collecting ultralytics
  Obtaining dependency information for ultralytics from https://files.pythonhosted.org/packages/e0/06/f07c56b8080af737ad42991fc1bb4e94da490c5227225c8337b8c248bd44/ultralytics-8.1.2-py3-none-any.whl.metadata
  Downloading ultralytics-8.1.2-py3-none-any.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.2/40.2 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Collecting thop>=0.1.1 (from ultralytics)
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Downloading ultralytics-8.1.2-py3-none-any.whl (699 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m699.7/699.7 kB[0m [31m19.1 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hInstalling collected packages: thop, ultralytics
Successfully installed thop-0.1.1.post2209072238 ultralytics-8.1.2


In [2]:
# !pip install transformers

In [3]:
#transform.py
from torchvision.transforms import Compose, Resize, ToTensor, Normalize, ToPILImage, v2, CenterCrop

train_transform = Compose([
        ToPILImage(),
        Resize(256),
        CenterCrop(224),
#         ToTensor(),
        v2.RandomHorizontalFlip(p=0.5),
        ToTensor(),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
test_transform = Compose([
        ToPILImage(),
        Resize(256),
        CenterCrop(224),
        ToTensor(),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])



In [44]:
#config.py
gender_list = ["female", "male"]
age_list = ["20-30s", "40-50s", "baby", "kid", "senior", "teenager"]
race_list = ["caucasian", "mongoloid", "negroid"]
skintone_list = ["dark", "light", "mid-dark", "mid-light"]
masked_list = ["masked", "unmasked"]
emotion_list = ["anger", "disgust", "fear", "happiness", "neutral", "sadness", "surprise"]
classify_lst = [gender_list, age_list, race_list, 
                skintone_list,  emotion_list, masked_list]  
device = 'cuda'if torch.cuda.is_available() else 'cpu'

In [45]:
import numpy as np
from ultralytics import YOLO
import torch
import cv2
import matplotlib.pyplot as plt

class FaceDetector:
    def __init__(self):
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.model = self.load_model()
    
    def load_model(self):
        model = YOLO('/kaggle/input/forfdsminds/models/detect/yolov8n/latest/best.pt')
        model.fuse()
        return model
    
    def predict(self, img):
        results = self.model(img)
        return results
    
    def get_bboxes(self, img):
        results = self.predict(img)
        bboxes = results[0].boxes.data.cpu().numpy()/np.array([img.shape[1], img.shape[0], img.shape[1], img.shape[0], 1, 1])
        return bboxes
    
    def crop(self, img, box):
        H, W, _ = img.shape
        x1, y1, x2, y2 = box
        x1 = int(x1*W*0.95)
        x2 = min(int(x2*W*1.05), int(W)) 
        y1 = int(y1*H*0.95)
        y2 = min(int(y2*H*1.05), int(H))
        return img[y1:y2, x1:x2, :]
        
    def btc_bbox(self, img):
        bboxes = self.get_bboxes(img)
        bboxes = [list(box) for box in list(bboxes)]
        output = []
        for i in range(len(bboxes)):    
            x1 = int(bboxes[i][0]*img.shape[1]) 
            y1 = int(bboxes[i][1]*img.shape[0]) 
            x2 = int(bboxes[i][2]*img.shape[1])
            y2 = int(bboxes[i][3]*img.shape[0])
            cropped = self.crop(img, bboxes[i][:4])
            output.append({'bbox': [x1, y1, x2-x1, y2-y1],
                           'cropped':cropped}) 
        return output
        
    
    def plot_bboxes(self, img):
        bboxes = self.get_bboxes(img)
        bboxes = [list(box) for box in list(bboxes)]
        output = np.copy(img)
        for i in range(len(bboxes)):    
            output = cv2.rectangle(output, (int(bboxes[i][0]*img.shape[1]), int(bboxes[i][1]*img.shape[0])), (int(bboxes[i][2]*img.shape[1]), int(bboxes[i][3]*img.shape[0])), (0, 255, 0), 2)
            output = cv2.putText(output, f'text {i+1}', (int(bboxes[i][0]*img.shape[1]), int(bboxes[i][1]*img.shape[0])), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 255), 2)
        return output
    
    def show_cropped_images(self, image):
        bboxes = self.get_bboxes(image)
        bboxes = [list(box) for box in list(bboxes)]
        print(bboxes)
        n = len(bboxes)
        fig, axes = plt.subplots()
        for i in range(n):
            img = self.crop(image, bboxes[i][:4])
            axes.imshow(img)
        plt.show()

In [56]:
from torchvision.models import resnet50, ResNet50_Weights, efficientnet_v2_s, EfficientNet_V2_S_Weights
import torch
import torch.nn as nn
import cv2
# from transform import test_transform
# from config import classify_lst

class MyResnet(nn.Module):
    def __init__(self, n_classes=2):
        super().__init__()
        self.backbone = resnet50() #weights=ResNet50_Weights.DEFAULT)
        del self.backbone.fc
        self.fc = nn.Linear(2048, n_classes)
    
    def forward(self, x):
        x = self.backbone.conv1(x)
        x = self.backbone.bn1(x)
        x = self.backbone.relu(x)
        x = self.backbone.maxpool(x)
        x = self.backbone.layer1(x)
        x = self.backbone.layer2(x)
        x = self.backbone.layer3(x)
        x = self.backbone.layer4(x)
        x = self.backbone.avgpool(x)

        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x
    
class MyEffnet(nn.Module):
    def __init__(self, n_classes=7):
        super().__init__()
        self.backbone = efficientnet_v2_s(weights=EfficientNet_V2_S_Weights.DEFAULT)
        self.backbone.classifier[1] = nn.Linear(1280, n_classes)
    
    def forward(self, x):
        x = self.backbone(x)
        return x
    
def create_classify_models():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    resgen = MyEffnet(n_classes=2).to(device)
    resage = MyResnet(n_classes=6).to(device)
    resrace = MyEffnet(n_classes=3).to(device)
    reskin = MyEffnet(n_classes=4).to(device)
    resemo = MyEffnet(n_classes=7).to(device)
    resmask = MyResnet(n_classes=2).to(device)
    
    resgen.load_state_dict(torch.load('/kaggle/input/forfdsminds/models/classify/efficient/gender/best.pt', map_location=torch.device('cpu')))
    resage.load_state_dict(torch.load('/kaggle/input/forfdsminds/models/classify/classify_model/age/best.pt', map_location=torch.device('cpu')))
    resrace.load_state_dict(torch.load('/kaggle/input/forfdsminds/models/classify/efficient/race/best.pt', map_location=torch.device('cpu')))
    reskin.load_state_dict(torch.load('/kaggle/input/forfdsminds/models/classify/efficient/skintone/best.pt', map_location=torch.device('cpu')))
    resemo.load_state_dict(torch.load('/kaggle/input/forfdsminds/models/classify/efficient/emotion/best.pt', map_location=torch.device('cpu')))
    resmask.load_state_dict(torch.load('/kaggle/input/forfdsminds/models/classify/classify_model/masked/best.pt', map_location=torch.device('cpu')))
    
    resgen.eval()
    resage.eval()
    resrace.eval()
    reskin.eval()
    resemo.eval()
    resmask.eval()
    # reskin = Skintone_model()
    
    return resgen, resage, resrace, reskin, resemo, resmask

# def predict(models, img):
#     # img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
#     skin_img = img
#     skin_img = cv2.cvtColor(skin_img, cv2.COLOR_RGB2BGR)
#     img = test_transform(img)
#     img = img.unsqueeze(0)
#     output = []
#     for i in range(6):
#         try:
#             prediction = models[i](img)
#             prediction = torch.argmax(prediction, dim=1)
#             prediction = prediction.item()
#             output.append(classify_lst[i][prediction])
#         except:
#             prediction = models[i].predict_skintone(skin_img)
#             output.append(prediction)
#     return output

def predict(models, img):
    # img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    img = test_transform(img)
    img = img.unsqueeze(0)
    img = img.to(device)
    output = []
    for i in range(6):
        prediction = models[i](img)
        prediction = torch.argmax(prediction, dim=1)
        prediction = prediction.item()
        output.append(classify_lst[i][prediction])
    return output

In [57]:
# # pipeline.py
# # from detect_models import FaceDetector
# # from classify_models import create_classify_models, predict
# import cv2

# detector = FaceDetector()
# models = create_classify_models()

# def pipeline(path):
#     img = cv2.imread(path)
#     btc_bbox = detector.btc_bbox(img)
#     for box in btc_bbox:
#         cropped = box['cropped']
#         gender, age, race, skintone, emotion, masked = predict(models, cropped)
# #         box['gender'] = gender
# #         box['age'] = age
# #         box['race'] = race
# #         box['skintone'] = skintone
# #         box['emotion'] = emotion
# #         box['masked'] = masked
#     return btc_bbox

# if __name__ == '__main__':
#     path = '/kaggle/input/black-male-masked-shutterstock/kaggle/working/black-male-masked-crawl/cr_1638.jpg'
#     btc_bbox = pipeline(path)
#     print(btc_bbox)

In [58]:
import json
import os
with open('/kaggle/input/jsonfile/file_name_to_image_id_private.json') as json_file:
    data = json.load(json_file)
# data
len(data['images']), len(os.listdir('/kaggle/input/pixtahackathon80/data'))

(12653, 12653)

In [59]:
images = data['images']
images[0]

{'id': 1,
 'file_name': '10003832.jpg',
 'height': 2000,
 'width': 1459,
 'license': None,
 'coco_url': None}

In [60]:
# from detect_models import FaceDetector
# from classify_models import create_classify_models, predict
import cv2
import os
from tqdm.auto import tqdm

detector = FaceDetector()
models = create_classify_models()
public_test_files = os.listdir('/kaggle/input/pixtahackathon80/data/') 

Model summary (fused): 168 layers, 3005843 parameters, 0 gradients, 8.1 GFLOPs


In [61]:
# !mkdir bbox

In [62]:
# def draw_bbox(file_name, img, bboxes):
# #     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#     for bbox in bboxes:
#         x = bbox[0]
#         y = bbox[1]
#         w = bbox[2] 
#         h = bbox[3]
#         img = cv2.rectangle(img, (x, y), (x+w, y+h), (255, 255, 0), 1)
#         file = '/kaggle/working/bbox/'+file_name
#         cv2.imwrite(file, img)

In [63]:
# def pipeline(file_name):
#     path = '/kaggle/input/pixtahackathon80/data/'+file_name
#     img = cv2.imread(path)
#     btc_bbox = detector.btc_bbox(img)
#     bboxes = []
#     for box in btc_bbox:
#         box['file_name'] = file_name
#         box['image_id'] = data[file_name]
#         cropped = box['cropped']
#         gender, age, race, skintone, emotion, masked = predict(models, cropped)
#         box['gender'] = gender
#         box['age'] = age
#         box['race'] = race
#         box['skintone'] = skintone
#         box['emotion'] = emotion
#         box['masked'] = masked
#         box['bbox'] = f'{box["bbox"]}'
#         del box['cropped']
#         output.append(box)
#         bboxes.append(box['bbox'])
#     draw_bbox(file_name, img, bboxes)
        

In [64]:
import matplotlib.pyplot as plt
def pipeline(image):
    file_name = image['file_name']
    path = '/kaggle/input/pixtahackathon80/data/'+file_name
    img = cv2.imread(path)
    btc_bbox = detector.btc_bbox(img)
    for box in btc_bbox:
        box['file_name'] = file_name
        box['image_id'] = image['id']
        cropped = box['cropped']
        gender, age, race, skintone, emotion, masked = predict(models, cropped)
        box['gender'] = gender
        box['age'] = age
        box['race'] = race
        box['skintone'] = skintone
        box['emotion'] = emotion
        box['masked'] = masked
        box['bbox'] = f'{box["bbox"]}'
        del box['cropped']
        output.append(box)

In [65]:
output = []
pipeline(images[0])
print(output)


0: 640x480 1 face, 5.8ms
Speed: 2.4ms preprocess, 5.8ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 480)
[{'bbox': '[582, 283, 271, 355]', 'file_name': '10003832.jpg', 'image_id': 1, 'gender': 'male', 'age': '20-30s', 'race': 'mongoloid', 'skintone': 'light', 'emotion': 'neutral', 'masked': 'unmasked'}]


In [67]:
output = []
for file in tqdm(images):
    pipeline(file)


  0%|          | 0/12653 [00:00<?, ?it/s]


0: 640x480 1 face, 10.9ms
Speed: 3.0ms preprocess, 10.9ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 480)

0: 448x640 1 face, 56.9ms
Speed: 2.3ms preprocess, 56.9ms inference, 1.1ms postprocess per image at shape (1, 3, 448, 640)

0: 512x640 1 face, 56.7ms
Speed: 2.6ms preprocess, 56.7ms inference, 1.2ms postprocess per image at shape (1, 3, 512, 640)

0: 448x640 1 face, 6.5ms
Speed: 2.9ms preprocess, 6.5ms inference, 1.1ms postprocess per image at shape (1, 3, 448, 640)

0: 512x640 1 face, 6.3ms
Speed: 2.5ms preprocess, 6.3ms inference, 1.1ms postprocess per image at shape (1, 3, 512, 640)

0: 448x640 1 face, 6.9ms
Speed: 2.2ms preprocess, 6.9ms inference, 1.1ms postprocess per image at shape (1, 3, 448, 640)

0: 384x640 1 face, 93.2ms
Speed: 2.6ms preprocess, 93.2ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 448x640 1 face, 6.4ms
Speed: 2.2ms preprocess, 6.4ms inference, 1.1ms postprocess per image at shape (1, 3, 448, 640)

0: 256x640 1 fa

In [68]:
import pandas as pd
# def make_csv(df):
#     order = ['file_name', 'bbox', 'race', 'age', 'emotion', 'gender', 'skintone', 'masked']
#     df = pd.DataFrame(output, columns=order)
#     df['race'] = df['race'].apply(lambda x: x.title())
#     df['age'] = df['age'].apply(lambda x: x.title())
#     df['emotion'] = df['emotion'].apply(lambda x: x.title())
#     df['gender'] = df['gender'].apply(lambda x: x.title())
#     df.to_csv('labels_new.csv', index=False)

In [69]:
# import re
# sorted_df = df.copy()
# pattern = r'[_;.]'
# sorted_df['id'] = [re.split(pattern, i)[1] for i in sorted_df['file_name']]
# # sorted_df.astype('int')
# sorted_df['id'] = [int(i) for i in sorted_df['id']]
# sorted_df.sort_values(by=['id'],inplace=True)
# sorted_df=sorted_df.drop(columns=['id'])
# sorted_df.head(10)

In [71]:
order = ['file_name', 'bbox', 'image_id','race', 'age', 'emotion', 'gender', 'skintone', 'masked']
sorted_df = pd.DataFrame(output, columns=order)
sorted_df['race'] = sorted_df['race'].apply(lambda x: x.title())
sorted_df['age'] = sorted_df['age'].apply(lambda x: x.title())
sorted_df['emotion'] = sorted_df['emotion'].apply(lambda x: x.title())
sorted_df['gender'] = sorted_df['gender'].apply(lambda x: x.title())
sorted_df.to_csv('answer.csv', index=False)

In [None]:
sorted_df.to_csv('sorted_output.csv', index=False)

In [None]:
!zip -r bbox.zip /kaggle/working/bbox

In [None]:
!zip bbox.zip /kaggle/working/bbox