## Clone repositories

In [None]:
!git clone https://github.com/camilalaranjeira/seeing_without_looking.git
!git clone https://github.com/WongKinYiu/yolov7.git seeing_without_looking/feature_extraction/

## Install dependencies

It may take a while.

In [None]:
!pip install thop facenet-pytorch piq opennsfw2 tensorflow==2.11

## Download models

In [25]:
import os

%cd /content/seeing_without_looking/feature_extraction

if not os.path.isfile('yolov7/yolov7.pt'):
    !wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt -P yolov7/

if not os.path.isfile('scenes/whole_wideresnet18_places365.pth.tar'):
    !gdown 1D6bGoJHuzXJhnr5KI70Zj1PkbGWWtWij -O scenes/ # whole_wideresnet18_places365.pth.tar

if not os.path.isfile('nsfw_model/nsfw_mobilenet2.224x224.h5'):
    !gdown 1t8cAnS8rNBQU8vo16CDAiBL0RuTJdesi -O nsfw_model/ #nsfw_mobilenet2.224x224.h5

if not os.path.isdir('fitzpatrick'):
    os.mkdir('fitzpatrick')
    !gdown 1AEtQ2s4k5R7IKdrK6vs_zqH_DvXIeFUK -O fitzpatrick/ # shape_predictor_68_face_landmarks.dat

if not os.path.isfile('model_age/vgg16_agegender.hdf5'):
    !gdown 1ZF33ousEHhAwK8MmNXpuwmvVtXilVAJ_ -O model_age/

%cd ../..

/content/seeing_without_looking/feature_extraction
Downloading...
From (original): https://drive.google.com/uc?id=1ZF33ousEHhAwK8MmNXpuwmvVtXilVAJ_
From (redirected): https://drive.google.com/uc?id=1ZF33ousEHhAwK8MmNXpuwmvVtXilVAJ_&confirm=t&uuid=4a48f442-1b29-41c8-b7da-59e9cd31f319
To: /content/seeing_without_looking/feature_extraction/model_age/vgg16_agegender.hdf5
100% 521M/521M [00:07<00:00, 70.3MB/s]
/content


## Load data

In [None]:
import os
if not os.path.isdir('images'):
    !gdown 1yxBrC9d6Hfun9sxgePSr5hlzQ8ErNGbE #SOD images
    !unzip -q images.zip
    !rm images.zip

Downloading...
From (original): https://drive.google.com/uc?id=1yxBrC9d6Hfun9sxgePSr5hlzQ8ErNGbE
From (redirected): https://drive.google.com/uc?id=1yxBrC9d6Hfun9sxgePSr5hlzQ8ErNGbE&confirm=t&uuid=35c8290f-d6d4-4ef7-a558-b108a1b8397d
To: /content/images.zip
100% 1.00G/1.00G [00:11<00:00, 88.4MB/s]


## Run inferences

Imports.

In [None]:
import sys, os
import pandas as pd

args = {
    'device': '0',
    'data_source': 'images', # path to dataset
    'rootpath': '/content/seeing_without_looking/feature_extraction/' # path to third party methods
}

############## Objects ##############
# https://github.com/WongKinYiu/yolov7
sys.path.append(os.path.join(args['rootpath'], 'yolov7'))
from models.experimental import attempt_load
from utils.general import check_img_size, non_max_suppression, scale_coords
from utils.datasets import LoadImages
from utils.torch_utils import select_device, TracedModel
import torch
import shutil
#####################################

############## Pornography ##############
# Yahoo OpenNSFW
import opennsfw2 as n2

# NSFW-JS
sys.path.append(args['rootpath'])
from nsfw_model.nsfw_detector import predict
#####################################

######### DEMOGRAPHICS #########
from skimage import io, transform
import cv2, gc, time

sys.path.append(os.path.join(args['rootpath'], 'model_age'))
from faces import get_faces_mtcnn
from configcnn import ConfigCNN
from keras.models import model_from_json
from fitzpatrick import Segmentation, SkinTone
from tensorflow.python.keras import backend as K
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
#####################################

######### SCENES #########
# https://github.com/CSAILVision/places365/blob/master/demo_pytorch_CAM.py
sys.path.append(args['rootpath'])
from scenes import places_torch as places
from torch.autograd import Variable as V
from torch.nn import functional as F
from torch import nn
import numpy as np
#####################################

######### QUALITY #########
from piq import brisque
from torchvision import transforms
#####################################

### 1. Store filenames into final structure

In [2]:
datapath = 'images'
samples = {'filenames': []}
for filename in os.listdir(datapath):
    ext = filename[filename.rfind('.')+1:]
    if ext in ['jpg', 'png', 'jpeg', 'gif', 'tiff', 'bmp', 'webp']:
        samples['filenames'].append(filename)

filename_idx = {filename: idx for idx, filename in enumerate(samples['filenames'])}

### 2. Object detection
From github repository of YOLOv7. Produces 3 novel columns:
* object_name
* object_bbox
* object_conf

In [None]:
rootpath = args['rootpath']
if not os.path.isdir(rootpath+'objects'):
    os.mkdir(rootpath+'objects')
    !gdown 1SAUaYzNJdQeZ2r1gmf6tLFB6oRclB4RA
    shutil.move('coco_categories.csv', rootpath+'objects/coco_categories.csv')

coco_categories = pd.read_csv(rootpath+'objects/coco_categories.csv')
args['weights'] = 'yolov7/yolov7.pt'
args['conf_thres'] = 0.25
args['iou_thres'] =  0.5
args['img_size'] =  640

device = select_device(args['device'])
half = device.type != 'cpu'  # half precision only supported on CUDA
imgsz = args['img_size']

# Load model
model = attempt_load(args['weights'], map_location=device)  # load FP32 model
stride = int(model.stride.max())  # model stride
imgsz = check_img_size(imgsz, s=stride)  # check img_size

model = TracedModel(model, device, imgsz)
model.half()

dataset = LoadImages(args['data_source'], img_size=imgsz, stride=stride)
names = model.module.names if hasattr(model, 'module') else model.names

# Run inference
if device.type != 'cpu':
    model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
old_img_w = old_img_h = imgsz
old_img_b = 1

###########################
samples['object_name'] = [None]*len(samples['filenames'])
samples['object_bbox'] = [None]*len(samples['filenames'])
samples['object_conf'] = [None]*len(samples['filenames'])
###########################

for en, (path, img, im0s, vid_cap) in enumerate(dataset):
    if en % 100 == 0:
        print(f'\r{en}', flush=True, end='')

    img = torch.from_numpy(img).to(device)
    img = img.half() if half else img.float()  # uint8 to fp16/32
    img /= 255.0  # 0 - 255 to 0.0 - 1.0
    if img.ndimension() == 3:
        img = img.unsqueeze(0)

    # Warmup
    if device.type != 'cpu' and (old_img_b != img.shape[0] or old_img_h != img.shape[2] or old_img_w != img.shape[3]):
        old_img_b = img.shape[0]
        old_img_h = img.shape[2]
        old_img_w = img.shape[3]
        for i in range(3):
            model(img, augment=False)[0]

    # Inference
    with torch.no_grad():   # Calculating gradients would cause a GPU memory leak
        pred = model(img, augment=False)[0]

    # Apply NMS
    pred = non_max_suppression(pred, args['conf_thres'], args['iou_thres'])

    for i, det in enumerate(pred):  # detections per image
        if len(det):
            # Rescale boxes from img_size to im0 size
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0s.shape).round()
            det[:, :4:2]  /= im0s.shape[0] # normalize
            det[:, 1:4:2] /= im0s.shape[1] # normalize

            xyxy    = det[:, :4].detach().cpu().tolist()
            conf    = det[:, 4].detach().cpu().tolist()
            classes = det[:, 5].detach().cpu().tolist()

            class_names = []
            for c in classes:
                row = coco_categories.iloc[int(c)]
                classname = '/'.join( (row['supercategory'], row['category']) )
                class_names.append(classname)

            idx = filename_idx[os.path.basename(path)]
            samples['object_name'][idx] = class_names
            samples['object_bbox'][idx] = xyxy
            samples['object_conf'][idx] = conf

Fusing layers... 
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


 Convert model to Traced-model... 


  if param.grad is not None:


 traced_script_module saved! 
 model is traced! 

0

### 3. Pornography

Two models:
* Yahoo OpenNSFW: https://github.com/bhky/opennsfw2
* NSFW-JS: https://github.com/infinitered/nsfwjs



In [None]:
samples['porn'] = []

for imgfile in samples['filenames']:
    imgfile = os.path.join(args['data_source'], imgfile)
    nsfw_probability = n2.predict_image(imgfile)
    samples['porn'].append(nsfw_probability)

/content
Pre-trained weights will be downloaded.


Downloading...
From: https://github.com/bhky/opennsfw2/releases/download/v0.1.0/open_nsfw_weights.h5
To: /root/.opennsfw2/weights/open_nsfw_weights.h5
100%|██████████| 24.2M/24.2M [00:00<00:00, 254MB/s]


0.999843955039978


In [15]:
rootpath = args['rootpath']
model = predict.load_model(f'{rootpath}/nsfw_model/nsfw_mobilenet2.224x224.h5')

samples['porn_2019'] = []
for k, filename in enumerate(samples['filenames']):
    filename = os.path.join(args['data_source'], filename)
    output = predict.classify(model, filename)
    output = list(output.values())[0]

    samples['porn_2019'].append(
        [output['neutral'], output['drawings'],
         output['hentai'], output['sexy'],
         output['porn']]
    )

/content




{'drawings': 0.0001275123649975285, 'hentai': 0.0366964191198349, 'neutral': 0.0009582224884070456, 'porn': 0.7896934747695923, 'sexy': 0.17252439260482788}

NSFW JS running time: 1.2891



### 4. Scenes

In [None]:
rootpath = os.path.join(args['rootpath'], 'scenes')

# load the labels
classes, classes_macro = places.load_labels(rootpath)
labels_IO = []
for key in sorted(classes_macro.keys()):
    labels_IO.append(classes_macro[key][0])
labels_IO = np.array(labels_IO)

# load the model
model = places.load_model(rootpath)
model.avgpool = nn.AvgPool2d(14)

# load the transformer
transf = places.returnTF() # image transformer

# get the softmax weight
params = list(model.parameters())
weight_softmax = params[-2].data.numpy()

samples['scene'] = []
samples['scene_conf'] = []
samples['scene_io'] = []

for imgfile in samples['filenames']:
    imgfile = os.path.join(args['data_source'], imgfile)
    img = places.imreadRotate(imgfile)
    with torch.no_grad():
        input_img = V(transf(img).unsqueeze(0))

        # forward pass
        logit = model.forward(input_img)
        h_x = F.softmax(logit).data.squeeze()
        probs, idx = h_x.sort(0, True)

    # < 0.5: indoor
    io_image = np.mean(labels_IO[idx[:10].numpy()]) # vote for the indoor or outdoor
    samples['scene_io'].append(io_image) # 0: indoor, 1: outdoor

    out = classes[idx[0]] # topcategory
    prob = probs[0] # conf

    macro = classes_macro[out]
    macro_lst = []
    if macro[0] == 0: # INDOOR
        macro_lst.append('indoor')
        macro_lst.append('residential' if macro[1]==0 else 'commercial')
    else:
        macro_lst.append('outdoor')
        macro_lst.append('nature' if macro[1]==0 else 'urban')

    macro_lst.append(out)
    samples['scene'].append('/'.join(macro_lst))
    samples['scene_conf'].append(prob.item())



dont rotate


  h_x = F.softmax(logit).data.squeeze()


### Demographics

In [49]:
## model for age, child and gender
model_age  = model_from_json(
    open(os.path.join(args['rootpath'], 'model_age', 'vgg16_agegender_model.json')).read()
)
device = args['device']

config = tf.ConfigProto(device_count = {'GPU': 0})
sess = tf.Session(config=config)
K.set_session(sess)

## model for segmentation + ITA calculation
skin_tone = SkinTone(modelpath=os.path.join(
                    args['rootpath'], 'fitzpatrick',
                    'shape_predictor_68_face_landmarks.dat'))

attributes = ['face_name', 'face_bbox', 'face_conf',
             'age', 'child', 'gender', 'skin_ita']
for attr in attributes:
    samples[attr] = []

with sess:
    model_age.load_weights(os.path.join(args['rootpath'], 'model_age', 'vgg16_agegender.hdf5'))

    for k, filename in enumerate(samples['filenames']):
        if k % 100 == 0:
            print(f'\rExtracting signals from MTCNN + Skin + Age + Child + Gender: {k}/{len(samples)}', end='', flush=True)

        imgpath = os.path.join(args['data_source'], filename)

        results = get_faces_mtcnn(imgpath, device)
        if len(results) == 0:
            for attr in attributes:
                samples[attr].append(np.nan)
            continue

        samples['face_name'].append('has_face')
        samples['face_bbox'].append([res[1] for res in results])
        samples['face_conf'].append([res[2] for res in results])

        age    = []
        child  = []
        gender = []
        skin_ita = []

        for res in results:
            ita, patch = skin_tone.ITA(res[0])
            skin_ita.append(ita)

            face = transform.resize(res[0], (128, 128))
            predictions = model_age.predict(face[None,:,:,:])

            age.append(predictions[0][0].tolist())
            child.append(predictions[1][0][0].item())
            gender.append(predictions[2][0][0].item())

        samples['age'].append(age)
        samples['child'].append(child)
        samples['gender'].append(gender)
        samples['skin_ita'].append(skin_ita)


Extracting signals from MTCNN + Skin + Age + Child + Gender: 0/8

  updates=self.state_updates,


FACE running time: 0.5246

