In [1]:
import pandas as pd
import numpy as np
import spacy
from rake_nltk import Rake
import nltk
from flask import jsonify
from cnn import CelebADataset, cnn
import torch
import imageio as io
from torchvision import transforms

In [2]:
nltk.download('stopwords')
nltk.download('punkt')

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/acabrera/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /Users/acabrera/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [3]:
model = cnn()
model.load_state_dict(torch.load("../model3epoch.ckpt"))
model.eval()

cnn(
  (layer1): Sequential(
    (0): Conv2d(3, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(8, 16, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Linear(in_features=288, out_features=1, bias=True)
  (final): Sigmoid()
)

In [4]:
r = Rake(max_length=5)
nlp = spacy.load("en_core_web_md")

In [5]:
df = pd.read_csv("reports-glasses-v3.csv", sep="|")
df.head()

Unnamed: 0.1,Unnamed: 0,index,image,description,confidence,label,pred
0,0,1,191326,the glasses do not have frames,0,1,-1
1,1,2,181854,The helmet is obscuring the face,1,1,-1
2,2,4,166401,Maybe the color of the glasses caused it to fail.,3,1,-1
3,3,5,177531,Probably because the rim of the glasses are so...,4,1,-1
4,4,9,173924,they can't see the front image of the face.,3,1,-1


In [6]:
df = df.drop("Unnamed: 0", axis=1)

In [7]:
len(df)

163

In [8]:
def get_keywords(sen):
    r.extract_keywords_from_text(sen)
    return r.get_ranked_phrases() 

In [9]:
def get_vecs(keywords):
    l = []
    for w in keywords:
        tokens = nlp(w)
        l.append(list(tokens.vector))
    return l 

In [15]:
def get_pred(img):
    image = io.imread("../../img_align_celeba/" + str(img) + ".jpg")
    trans = transforms.Compose([transforms.ToTensor()])
    image = trans(image)
    return str(round(model(image.unsqueeze(0)).item(), 3))
    

In [16]:
df["keywords"] = df["description"].apply(get_keywords)

In [17]:
df["vecs"] = df["keywords"].apply(get_vecs)

In [18]:
df["vec"] = df["description"].apply(lambda x : list(nlp(x).vector))

In [19]:
df["pred"] = df["image"].apply(get_pred)

In [20]:
df.head()

Unnamed: 0,index,image,description,confidence,label,pred,keywords,vecs,vec
0,1,191326,the glasses do not have frames,0,1,0.041,"[glasses, frames]","[[-0.25882, -0.99271, -0.21225, -0.2585, -0.30...","[0.05178317, -0.18900181, -0.43239498, -0.2266..."
1,2,181854,The helmet is obscuring the face,1,1,0.032,"[obscuring, helmet, face]","[[-0.476, -0.25002, -0.078249, 0.17018, -0.158...","[0.060249835, -0.17829168, -0.1702378, 0.05297..."
2,4,166401,Maybe the color of the glasses caused it to fail.,3,1,0.015,"[glasses caused, maybe, fail, color]","[[-0.34338498, -0.36056, -0.291215, -0.1524235...","[-0.03408092, 0.06473809, -0.17509338, -0.1664..."
3,5,177531,Probably because the rim of the glasses are so...,4,1,0.132,"[looks liek, wearing, thin, rim, probably, gla...","[[-0.083631, -0.033243, 0.0559185, -0.5001625,...","[-0.0041016345, 0.04782558, -0.17182252, -0.16..."
4,9,173924,they can't see the front image of the face.,3,1,0.137,"[front image, see, face]","[[0.080575004, -0.062069997, -0.090965, -0.190...","[0.0075879116, 0.057661, -0.19230819, -0.14730..."


In [21]:
df.to_csv("../client/public/data.csv", sep="|")