In [34]:
import pandas as pd
import numpy as np
import spacy
from rake_nltk import Rake
import nltk
from flask import jsonify
from cnn import CelebADataset, cnn
import torch
import imageio as io
from torchvision import transforms

In [28]:
nltk.download('stopwords')
nltk.download('punkt')

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/acabrera/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /Users/acabrera/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [22]:
model = cnn()
model.load_state_dict(torch.load("../model3epoch.ckpt"))
model.eval()

cnn(
  (layer1): Sequential(
    (0): Conv2d(3, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(8, 16, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Linear(in_features=288, out_features=1, bias=True)
  (final): Sigmoid()
)

In [3]:
r = Rake(max_length=5)
nlp = spacy.load("en_core_web_md")

In [6]:
df = pd.read_csv("reports-glasses-v2.csv", sep="|")
df = df.drop("Unnamed: 0", axis=1)
df = df.drop("Unnamed: 0.1", axis=1)

In [7]:
df.head()

Unnamed: 0,confidence,description,image,label
0,1,His eye region is dark which may cause confusion.,194653,-1
1,2,Her face is at an angle rather than straight on.,184352,1
2,3,"His glasses are clear and have a very thin, ba...",181108,1
3,4,There is very little contrast between the glas...,177130,1
4,3,There isn't enough light for the algorithm to ...,174902,1


In [8]:
len(df)

228

In [9]:
def get_keywords(sen):
    r.extract_keywords_from_text(sen)
    return r.get_ranked_phrases() 

In [10]:
def get_vecs(keywords):
    l = []
    for w in keywords:
        tokens = nlp(w)
        l.append(list(tokens.vector))
    return l 

In [32]:
def get_pred(img):
    image = io.imread("../img_align_celeba/" + str(img) + ".jpg")
    trans = transforms.Compose([transforms.ToTensor()])
    image = trans(image)
    return str(round(model(image.unsqueeze(0)).item(), 3))
    

In [12]:
df["keywords"] = df["description"].apply(get_keywords)

In [13]:
df["vecs"] = df["keywords"].apply(get_vecs)

In [14]:
df["vec"] = df["description"].apply(lambda x : list(nlp(x).vector))

In [35]:
df["pred"] = df["image"].apply(get_pred)

In [36]:
df.head()

Unnamed: 0,confidence,description,image,label,keywords,vecs,vec,pred
0,1,His eye region is dark which may cause confusion.,194653,-1,"[may cause confusion, eye region, dark]","[[-0.2799537, 0.13365866, -0.25872934, -0.0339...","[-0.038665097, 0.076024495, -0.08534257, -0.07...",0.694
1,2,Her face is at an angle rather than straight on.,184352,1,"[angle rather, straight, face]","[[-0.019873999, 0.2108075, -0.25791, 0.0650215...","[-0.071306005, 0.17473164, -0.15770243, 0.1243...",0.43
2,3,"His glasses are clear and have a very thin, ba...",181108,1,"[barely visible frame, thin, glasses, clear]","[[0.12441834, -0.122789346, -0.38401666, 0.131...","[-0.04534092, 0.032248147, -0.28398114, 0.1017...",0.602
3,4,There is very little contrast between the glas...,177130,1,"[little contrast, glasses, face]","[[-0.325835, 0.33699, -0.25858998, 0.084259994...","[-0.06288666, 0.05702166, -0.19873732, -0.0624...",0.995
4,3,There isn't enough light for the algorithm to ...,174902,1,"[enough light, glasses, detect, algorithm]","[[-0.12063425, 0.241955, 0.0317, 0.166229, -0....","[-0.008412112, 0.14015076, -0.14917213, -0.089...",0.894


In [37]:
df.to_csv("../client/public/data.csv", sep="|")