In [1]:
#!pip install ftfy regex tqdm
#!pip install git+https://github.com/openai/CLIP.git

#https://github.com/ml-research/Q16
#!pip install --upgrade "nudenet>=3.4.2"
# https://github.com/notAI-tech/NudeNet

In [2]:
import numpy as np
import torch
import os
import PIL
import pickle
import clip
import pandas as pd

from tqdm import tqdm
from IPython.display import Image

In [3]:
class ClipWrapper(torch.nn.Module):
    def __init__(self, device, model_name='ViT-L/14'):
        super(ClipWrapper, self).__init__()
        self.clip_model, self.preprocess = clip.load(model_name,
                                                     device,
                                                     jit=False)
        self.clip_model.eval()

    def forward(self, x):
        return self.clip_model.encode_image(x)


class SimClassifier(torch.nn.Module):
    def __init__(self, embeddings, device):
        super(SimClassifier, self).__init__()
        self.embeddings = torch.nn.parameter.Parameter(embeddings)

    def forward(self, x):
        embeddings_norm = self.embeddings / self.embeddings.norm(dim=-1,
                                                                 keepdim=True)
        # Pick the top 5 most similar labels for the image
        image_features_norm = x / x.norm(dim=-1, keepdim=True)

        similarity = (100.0 * image_features_norm @ embeddings_norm.T)
        # values, indices = similarity[0].topk(5)
        return similarity.squeeze()

def initialize_prompts(clip_model, text_prompts, device):
    text = clip.tokenize(text_prompts).to(device)
    return clip_model.encode_text(text)


def save_prompts(classifier, save_path):
    prompts = classifier.embeddings.detach().cpu().numpy()
    pickle.dump(prompts, open(save_path, 'wb'))


def load_prompts(file_path, device):
    return torch.HalfTensor(pickle.load(open(file_path, 'rb'))).to(device)

In [4]:
def compute_embeddings(image_paths):
    images = [clip.preprocess(PIL.Image.open(image_path)) for image_path in image_paths]
    images = torch.stack(images).to(device)
    return clip(images).half()

In [5]:
def process_images_in_batches(image_files, batch_size=10):
    results = []
    
    for i in tqdm(range(0, len(image_files), batch_size), desc="Processing batches"):
        batch = image_files[i:i + batch_size]  # Get the current batch
        batch_embeddings = compute_embeddings(batch)  # Process the batch
        batch_embeddings = batch_embeddings.to(device)
        y = classifier(batch_embeddings)
        y = torch.argmax(y, dim=1)  # Get the predicted labels
        for file, label in zip(batch, y.tolist()):
            results.append({'file': file, 'q16': label})
    
        torch.cuda.empty_cache()
    
    df = pd.DataFrame(results)
    return df


In [6]:
device='cuda'
prompt_path = '../data/q16/prompts.p'

trained_prompts = load_prompts(prompt_path, device=device)

In [7]:
clip = ClipWrapper(device)
print('initialized clip model')

initialized clip model


In [8]:
classifier = SimClassifier(trained_prompts, device)
print('initialized classifier')

initialized classifier


In [9]:
image_dir = "../data/images/"
image_files = [
    os.path.join(image_dir, file) for file in os.listdir(image_dir) if file.endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif'))
]

In [10]:
df = process_images_in_batches(
    image_files, batch_size=10
)

Processing batches: 100%|██████████| 115/115 [00:54<00:00,  2.11it/s]


In [11]:
df

Unnamed: 0,file,q16
0,../data/images/35a36511-d350-4daf-84b8-0bab618...,1
1,../data/images/ec7a7fe4-b547-40e5-92e4-0471de3...,0
2,../data/images/22abfc0a-4034-4889-a1d9-8d127e3...,1
3,../data/images/a8c5b643-a8ec-46b8-b56b-555feb3...,0
4,../data/images/7987eca7-41b0-45d5-8197-4d258a5...,1
...,...,...
1142,../data/images/ddabb8ce-d8f6-4467-9e0a-38fb635...,1
1143,../data/images/23070411-81b0-4c8b-8a18-b6378ee...,1
1144,../data/images/788cc350-ac91-4d70-9845-b1ce461...,0
1145,../data/images/15465166-f68e-4764-9b99-a14d933...,0


In [23]:

from nudenet import NudeDetector
detector = NudeDetector()
# the 320n model included with the package will be used
detector.detect_batch(image_files[0:5]) # Returns list of [list of detections]


[1;31m2024-12-04 17:07:36.986961687 [E:onnxruntime:Default, env.cc:234 ThreadMain] pthread_setaffinity_np failed for thread: 3555728, index: 0, mask: {1, }, error code: 22 error msg: Invalid argument. Specify the number of threads explicitly so the affinity is not set.[m
[1;31m2024-12-04 17:07:36.986977749 [E:onnxruntime:Default, env.cc:234 ThreadMain] pthread_setaffinity_np failed for thread: 3555729, index: 1, mask: {2, }, error code: 22 error msg: Invalid argument. Specify the number of threads explicitly so the affinity is not set.[m
[1;31m2024-12-04 17:07:36.995476713 [E:onnxruntime:Default, env.cc:234 ThreadMain] pthread_setaffinity_np failed for thread: 3555760, index: 32, mask: {33, }, error code: 22 error msg: Invalid argument. Specify the number of threads explicitly so the affinity is not set.[m
[1;31m2024-12-04 17:07:36.987019654 [E:onnxruntime:Default, env.cc:234 ThreadMain] pthread_setaffinity_np failed for thread: 3555730, index: 2, mask: {3, }, error code: 22 err

[[],
 [{'class': 'FACE_FEMALE',
   'score': 0.7494229674339294,
   'box': [168, 97, 58, 55]},
  {'class': 'BUTTOCKS_COVERED',
   'score': 0.355330228805542,
   'box': [113, 262, 126, 89]}],
 [{'class': 'FACE_FEMALE',
   'score': 0.604433536529541,
   'box': [128, 30, 251, 269]}],
 [],
 []]

In [22]:
image_files[1]

'../data/images/ec7a7fe4-b547-40e5-92e4-0471de3c2808.png'

In [21]:
detector = NudeDetector()
# the 320n model included with the package will be used
detector.detect(image_files[]) # Returns list of detections

[1;31m2024-12-04 17:04:30.444801572 [E:onnxruntime:Default, env.cc:234 ThreadMain] pthread_setaffinity_np failed for thread: 3555308, index: 0, mask: {1, }, error code: 22 error msg: Invalid argument. Specify the number of threads explicitly so the affinity is not set.[m
[1;31m2024-12-04 17:04:30.444808200 [E:onnxruntime:Default, env.cc:234 ThreadMain] pthread_setaffinity_np failed for thread: 3555309, index: 1, mask: {2, }, error code: 22 error msg: Invalid argument. Specify the number of threads explicitly so the affinity is not set.[m
[1;31m2024-12-04 17:04:30.444845632 [E:onnxruntime:Default, env.cc:234 ThreadMain] pthread_setaffinity_np failed for thread: 3555310, index: 2, mask: {3, }, error code: 22 error msg: Invalid argument. Specify the number of threads explicitly so the affinity is not set.[m
[1;31m2024-12-04 17:04:30.455394662 [E:onnxruntime:Default, env.cc:234 ThreadMain] pthread_setaffinity_np failed for thread: 3555318, index: 10, mask: {11, }, error code: 22 err

[{'class': 'FACE_FEMALE',
  'score': 0.7494229674339294,
  'box': [168, 97, 58, 55]},
 {'class': 'BUTTOCKS_COVERED',
  'score': 0.355330228805542,
  'box': [113, 262, 126, 89]}]

In [16]:

from nudenet import NudeDetector
detector = NudeDetector()
# the 320n model included with the package will be used
detector.detect_batch(image_files) # Returns list of [list of detections]


[1;31m2024-12-04 17:00:37.338906014 [E:onnxruntime:Default, env.cc:234 ThreadMain] pthread_setaffinity_np failed for thread: 3554614, index: 0, mask: {1, }, error code: 22 error msg: Invalid argument. Specify the number of threads explicitly so the affinity is not set.[m
[1;31m2024-12-04 17:00:37.338925745 [E:onnxruntime:Default, env.cc:234 ThreadMain] pthread_setaffinity_np failed for thread: 3554615, index: 1, mask: {2, }, error code: 22 error msg: Invalid argument. Specify the number of threads explicitly so the affinity is not set.[m
[1;31m2024-12-04 17:00:37.342470847 [E:onnxruntime:Default, env.cc:234 ThreadMain] pthread_setaffinity_np failed for thread: 3554626, index: 12, mask: {13, }, error code: 22 error msg: Invalid argument. Specify the number of threads explicitly so the affinity is not set.[m
[1;31m2024-12-04 17:00:37.343006973 [E:onnxruntime:Default, env.cc:234 ThreadMain] pthread_setaffinity_np failed for thread: 3554627, index: 13, mask: {14, }, error code: 22 e

KeyboardInterrupt: 