# Image recognition with TTR


## Bridging between perceptual and conceptual domains

Let's apply the object detection representation proposed in Dobnik & Cooper's *Interfacing language, spatial perception and cognition in TTR* to image recognition.

![Fig 8](fig/lspc-fig8.png)

Here, we use `Image` instead of `PointMap` for the whole, but instead of `reg:PointMap` we use yet another type (and rename it), `seg:Segment`. In Cooper's case the same type can be used to represent both the region and the whole, because a `PointMap` is a set of absolute positions. With `Image`, positions are relative to an origin, which needs to be specified when cropping.

I guess in the general case, the domain of an `ObjectDetector` function need not be the same as the `reg` fields in the output elements.

In [1]:
import sys
sys.path.append('pyttr')
from pyttr.ttrtypes import *
from pyttr.utils import *
import PIL.Image

ttrace()

# Basic types.

Ind = BType('Ind')

Int = BType('Int')
Int.learn_witness_condition(lambda x: isinstance(x, int))
print(Int.query(365))

Image = BType('Image')
Image.learn_witness_condition(lambda x: isinstance(x, PIL.Image.Image))
img = PIL.Image.open('res/dogcar.jpg')
print(Image.query(img))

# Segment type: a rectangular area of a given image.

Segment = RecType({#'i': Image,
    'cx': Int, 'cy': Int, 'w': Int, 'h': Int})
print(Segment.query(Rec({#'i': img,
    'cx': 100, 'cy': 150, 'w': 40, 'h': 20})))

# Redefine Image.show() to work with Rec.show().
def image_show(self):
    return str(self)
PIL.Image.Image.show = image_show
show(img)

True
True
True


'<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1080x1080 at 0x7F5F2C32F0B8>'

In [2]:
def latex(*objs):
    texcode = '\n\n'.join(to_ipython_latex(obj) for obj in objs)
    print(texcode)
    return Latex(texcode)

In [3]:
latex(Segment)

\begin{equation}\left[\begin{array}{rcl}
\text{cy} &:& Int\\
\text{h} &:& Int\\
\text{cx} &:& Int\\
\text{w} &:& Int
\end{array}\right]\end{equation}


<IPython.core.display.Latex object>

In [4]:
Ppty = FunType(Ind, Ty)
ImageDetection = RecType({'seg': Segment, 'pfun': Ppty})
ImageDetections = ListType(ImageDetection)
ObjectDetector = FunType(Image, ImageDetections)

latex(Ppty, ObjectDetector)

\begin{equation}\left(\begin{array}{rcl}
Ind\rightarrow Ty
\end{array}\right)\end{equation}

\begin{equation}\left(\begin{array}{rcl}
Image\rightarrow \left[\begin{array}{rcl}
\left[\begin{array}{rcl}
\text{seg} &:& \left[\begin{array}{rcl}
\text{cy} &:& Int\\
\text{h} &:& Int\\
\text{cx} &:& Int\\
\text{w} &:& Int
\end{array}\right]\\
\text{pfun} &:& \left(\begin{array}{rcl}
Ind\rightarrow Ty
\end{array}\right)
\end{array}\right]
\end{array}\right]
\end{array}\right)\end{equation}


<IPython.core.display.Latex object>

## Object detection model YOLO

Requires OpenCV and [Darkflow](https://github.com/thtrieu/darkflow). `yolo.weights` is from [Yolo](https://pjreddie.com/darknet/yolo/).

In [5]:
from darkflow.net.build import TFNet
import numpy as np

tfnet = TFNet({"model": "yolo/yolo.cfg", "load": "yolo/yolo.weights",
    'config': 'yolo', "threshold": 0.1})

Parsing yolo/yolo.cfg
Loading yolo/yolo.weights ...
Successfully identified 203934260 bytes
Finished in 0.030837535858154297s
Model has a coco model name, loading coco labels.

Building net ...
Source | Train? | Layer description                | Output size
-------+--------+----------------------------------+---------------
       |        | input                            | (?, 608, 608, 3)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 608, 608, 32)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 304, 304, 32)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 304, 304, 64)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 152, 152, 64)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 152, 152, 128)
 Load  |  Yep!  | conv 1x1p0_1  +bnorm  leaky      | (?, 152, 152, 64)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 152, 152, 128)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 76, 76, 128)
 Load  |  Yep!  | conv 3x3p1_1  +b

In [6]:
# Make preds and ptypes identifiable by their predicate names.
# From now on, use mktype().
ptypes = dict()
def mkptype(sym, types=[Ind], vars=['v']):
    id = '/'.join([sym, ','.join(show(type) for type in types), ','.join(vars)])
    if id not in ptypes:
        ptypes[id] = PType(Pred(sym, types), vars)
    return ptypes[id]

print(show(mkptype('rabbit') is mkptype('rabbit')))

True


In [7]:
def xy1xy2_to_cwh(x1, y1, x2, y2):
    '''Transform to center, width and height.'''
    return {'cx': int(x1/2 + x2/2), 'cy': int(y1/2 + y2/2), 'w': x2 - x1, 'h': y2 - y1}

In [8]:
def yolo_detector(i):
    return [Rec({
        'seg': Rec({
            #'i': i,
            **xy1xy2_to_cwh(o['topleft']['x'], o['topleft']['y'], o['bottomright']['x'], o['bottomright']['y']),
        }),
        'pfun': Fun('v', Ind, mkptype(o['label'], 'v')),
    }) for o in tfnet.return_predict(np.array(i))] # @todo RBG/BGR?

image_detections = yolo_detector(img)

print(ImageDetections.query(image_detections))
print(ImageDetection.query(image_detections[0]))
print(Ppty.query(image_detections[0].pfun))
print(Segment.query(image_detections[0].seg))

for image_detection in image_detections:
    print(show(image_detection))
latex(image_detections[0])

True
True
True
True
{seg = {cy = 654, h = 809, cx = 138, w = 276}, pfun = lambda v:Ind . person(v)}
{seg = {cy = 657, h = 796, cx = 714, w = 706}, pfun = lambda v:Ind . person(v)}
{seg = {cy = 888, h = 381, cx = 194, w = 380}, pfun = lambda v:Ind . person(v)}
{seg = {cy = 589, h = 979, cx = 490, w = 774}, pfun = lambda v:Ind . car(v)}
{seg = {cy = 714, h = 718, cx = 704, w = 687}, pfun = lambda v:Ind . dog(v)}
{seg = {cy = 541, h = 210, cx = 757, w = 219}, pfun = lambda v:Ind . chair(v)}
{seg = {cy = 687, h = 783, cx = 547, w = 778}, pfun = lambda v:Ind . chair(v)}
{seg = {cy = 677, h = 803, cx = 486, w = 957}, pfun = lambda v:Ind . sofa(v)}
{seg = {cy = 588, h = 423, cx = 93, w = 187}, pfun = lambda v:Ind . cell phone(v)}
{seg = {cy = 544, h = 107, cx = 44, w = 71}, pfun = lambda v:Ind . clock(v)}
\begin{equation}\left[\begin{array}{rcl}
\text{seg} &=& \left[\begin{array}{rcl}
\text{cy} &=& 654\\
\text{h} &=& 809\\
\text{cx} &=& 138\\
\text{w} &=& 276
\end{array}\right]\\
\text{pfun} 

<IPython.core.display.Latex object>

Here's a version where individuals are created too.

In [20]:
DetectedInd = RecType({'seg': Segment, 'pfun': Ppty, 'ind': Ind})
DetectedInds = ListType(DetectedInd)

def yolo_detector_ind(i):
    return [Rec({
        'seg': Rec(xy1xy2_to_cwh(o['topleft']['x'], o['topleft']['y'], o['bottomright']['x'], o['bottomright']['y'])),
        'pfun': Fun('v', Ind, mkptype(o['label'].replace(' ', '_'), 'v')),
        'ind': Ind.create(),
    }) for o in tfnet.return_predict(np.array(i))]

ind_detections = yolo_detector_ind(img)
print(DetectedInds.query(ind_detections))
latex(DetectedInd, ind_detections[0])

True
\begin{equation}\left[\begin{array}{rcl}
\text{ind} &:& Ind\\
\text{seg} &:& \left[\begin{array}{rcl}
\text{cy} &:& Int\\
\text{h} &:& Int\\
\text{cx} &:& Int\\
\text{w} &:& Int
\end{array}\right]\\
\text{pfun} &:& \left(\begin{array}{rcl}
Ind\rightarrow Ty
\end{array}\right)
\end{array}\right]\end{equation}

\begin{equation}\left[\begin{array}{rcl}
\text{ind} &=& a_{10}\\
\text{seg} &=& \left[\begin{array}{rcl}
\text{cy} &=& 654\\
\text{h} &=& 809\\
\text{cx} &=& 138\\
\text{w} &=& 276
\end{array}\right]\\
\text{pfun} &=& \lambda v:Ind\ .\ \text{person}(v)
\end{array}\right]\end{equation}


<IPython.core.display.Latex object>

## Spatial relations

In [10]:
# An index of IndDetection by Ind.
ind_dets = dict((r.ind, r) for r in ind_detections)

Left = mkptype('left', [Ind, Ind], ['a', 'b'])
Left.learn_witness_condition(lambda ab: ind_dets[ab[0]].seg.cx < ind_dets[ab[1]].seg.cx)
print(show(Left))

print(Left.query((ind_detections[0].ind, ind_detections[1].ind)))
print(Left.query((ind_detections[1].ind, ind_detections[2].ind)))

left(a, b)
True
False


In [11]:
ClassifiedInd = RecType({'ind': Ind, 'pfun': Ppty})
ClassifiedInds = ListType(ClassifiedInd)
LocatedInd = RecType({'ind': Ind, 'seg': Segment})
LocatedInds = ListType(LocatedInd)
print(ClassifiedInds.query(ind_detections))
print(LocatedInds.query(ind_detections))
latex(ClassifiedInd, LocatedInd)

True
True
\begin{equation}\left[\begin{array}{rcl}
\text{ind} &:& Ind\\
\text{pfun} &:& \left(\begin{array}{rcl}
Ind\rightarrow Ty
\end{array}\right)
\end{array}\right]\end{equation}

\begin{equation}\left[\begin{array}{rcl}
\text{ind} &:& Ind\\
\text{seg} &:& \left[\begin{array}{rcl}
\text{cy} &:& Int\\
\text{h} &:& Int\\
\text{cx} &:& Int\\
\text{w} &:& Int
\end{array}\right]
\end{array}\right]\end{equation}


<IPython.core.display.Latex object>

### Combining observed situations

In [12]:
WhatIs = Fun('r', DetectedInd, RecType({
    'a': Ind,
    'c': (LazyObj(['r', '.', 'pfun']), ['a'])
}))
latex(WhatIs)

\begin{equation}\lambda r:\left[\begin{array}{rcl}
\text{ind} &:& Ind\\
\text{seg} &:& \left[\begin{array}{rcl}
\text{cy} &:& Int\\
\text{h} &:& Int\\
\text{cx} &:& Int\\
\text{w} &:& Int
\end{array}\right]\\
\text{pfun} &:& \left(\begin{array}{rcl}
Ind\rightarrow Ty
\end{array}\right)
\end{array}\right]\ .\ \left[\begin{array}{rcl}
\text{c} &:& \langle [r, ., pfun], [a]\rangle\\
\text{a} &:& Ind
\end{array}\right]\end{equation}


<IPython.core.display.Latex object>

In [21]:
whats = [WhatIs.app(r) for r in ind_detections]
latex(whats)

\begin{equation}\left[\begin{array}{rcl} \left[\begin{array}{rcl}
\text{c} &:& \langle \lambda v:Ind\ .\ \text{person}(v), [a]\rangle\\
\text{a} &:& Ind
\end{array}\right]\\
\left[\begin{array}{rcl}
\text{c} &:& \langle \lambda v:Ind\ .\ \text{person}(v), [a]\rangle\\
\text{a} &:& Ind
\end{array}\right]\\
\left[\begin{array}{rcl}
\text{c} &:& \langle \lambda v:Ind\ .\ \text{person}(v), [a]\rangle\\
\text{a} &:& Ind
\end{array}\right]\\
\left[\begin{array}{rcl}
\text{c} &:& \langle \lambda v:Ind\ .\ \text{car}(v), [a]\rangle\\
\text{a} &:& Ind
\end{array}\right]\\
\left[\begin{array}{rcl}
\text{c} &:& \langle \lambda v:Ind\ .\ \text{dog}(v), [a]\rangle\\
\text{a} &:& Ind
\end{array}\right]\\
\left[\begin{array}{rcl}
\text{c} &:& \langle \lambda v:Ind\ .\ \text{chair}(v), [a]\rangle\\
\text{a} &:& Ind
\end{array}\right]\\
\left[\begin{array}{rcl}
\text{c} &:& \langle \lambda v:Ind\ .\ \text{chair}(v), [a]\rangle\\
\text{a} &:& Ind
\end{array}\right]\\
\left[\begin{array}{rcl}
\text{c} &:

<IPython.core.display.Latex object>

In [14]:
from functools import reduce
def combine_prev(*ts):
    return reduce(lambda t1, t2: RecType({'prev': t1}).merge(t2) if t2 else t1, ts)
latex(combine_prev(RecType({'a': 'A'}), RecType({'b': 'B'})))

\begin{equation}\left[\begin{array}{rcl}
\text{prev} &:& \left[\begin{array}{rcl}
\text{a} &:& A
\end{array}\right]\\
\text{b} &:& B
\end{array}\right]\end{equation}


<IPython.core.display.Latex object>

In [28]:
comb = combine_prev(*whats)
latex(comb)

\begin{equation}\left[\begin{array}{rcl}
\text{c} &:& \langle \lambda v:Ind\ .\ \text{clock}(v), [a]\rangle\\
\text{prev} &:& \left[\begin{array}{rcl}
\text{c} &:& \langle \lambda v:Ind\ .\ \text{cell_phone}(v), [a]\rangle\\
\text{prev} &:& \left[\begin{array}{rcl}
\text{c} &:& \langle \lambda v:Ind\ .\ \text{sofa}(v), [a]\rangle\\
\text{prev} &:& \left[\begin{array}{rcl}
\text{c} &:& \langle \lambda v:Ind\ .\ \text{chair}(v), [a]\rangle\\
\text{prev} &:& \left[\begin{array}{rcl}
\text{c} &:& \langle \lambda v:Ind\ .\ \text{chair}(v), [a]\rangle\\
\text{prev} &:& \left[\begin{array}{rcl}
\text{c} &:& \langle \lambda v:Ind\ .\ \text{dog}(v), [a]\rangle\\
\text{prev} &:& \left[\begin{array}{rcl}
\text{c} &:& \langle \lambda v:Ind\ .\ \text{car}(v), [a]\rangle\\
\text{prev} &:& \left[\begin{array}{rcl}
\text{c} &:& \langle \lambda v:Ind\ .\ \text{person}(v), [a]\rangle\\
\text{prev} &:& \left[\begin{array}{rcl}
\text{c} &:& \langle \lambda v:Ind\ .\ \text{person}(v), [a]\rangle\\
\text{pr

<IPython.core.display.Latex object>

In [29]:
combflat = comb.flatten()
latex(combflat)

\begin{equation}\left[\begin{array}{rcl}
\text{prev.prev.prev.prev.prev.a} &:& Ind\\
\text{prev.a} &:& Ind\\
\text{prev.prev.prev.a} &:& Ind\\
\text{prev.prev.a} &:& Ind\\
\text{prev.prev.prev.prev.prev.prev.prev.prev.prev.a} &:& Ind\\
\text{prev.prev.prev.prev.prev.c} &:& \langle \lambda v:Ind\ .\ \text{dog}(v), [prev.prev.prev.prev.prev.a]\rangle\\
\text{prev.prev.prev.prev.prev.prev.prev.a} &:& Ind\\
\text{prev.prev.c} &:& \langle \lambda v:Ind\ .\ \text{sofa}(v), [prev.prev.a]\rangle\\
\text{a} &:& Ind\\
\text{prev.prev.prev.prev.prev.prev.prev.prev.prev.c} &:& \langle \lambda v:Ind\ .\ \text{person}(v), [prev.prev.prev.prev.prev.prev.prev.prev.prev.a]\rangle\\
\text{prev.prev.prev.prev.prev.prev.a} &:& Ind\\
\text{prev.prev.prev.prev.prev.prev.prev.c} &:& \langle \lambda v:Ind\ .\ \text{person}(v), [prev.prev.prev.prev.prev.prev.prev.a]\rangle\\
\text{prev.prev.prev.prev.a} &:& Ind\\
\text{prev.prev.prev.prev.c} &:& \langle \lambda v:Ind\ .\ \text{chair}(v), [prev.prev.prev.prev.a

<IPython.core.display.Latex object>

In [30]:
def rec_merge(r, t):
    return Rec(dict(i for i in list(t.__dict__.items()) + list(r.__dict__.items())))
def combine_prev_rec(*rs):
    return reduce(lambda r1, r2: rec_merge(Rec({'prev': r1}), r2) if r2 else r1, rs)

combflat_rec = combine_prev_rec(*ind_detections).flatten()
latex(combflat_rec)

\begin{equation}\left[\begin{array}{rcl}
\text{prev.prev.prev.prev.prev.prev.prev.prev.prev.pfun} &=& \lambda v:Ind\ .\ \text{person}(v)\\
\text{prev.seg.w} &=& 187\\
\text{prev.prev.prev.prev.prev.prev.prev.seg.h} &=& 381\\
\text{seg.w} &=& 71\\
\text{prev.seg.h} &=& 423\\
\text{prev.prev.prev.prev.prev.prev.prev.prev.prev.seg.h} &=& 809\\
\text{prev.prev.prev.prev.prev.prev.prev.prev.seg.w} &=& 706\\
\text{prev.prev.prev.prev.prev.prev.prev.seg.cx} &=& 194\\
\text{prev.prev.prev.prev.prev.prev.pfun} &=& \lambda v:Ind\ .\ \text{car}(v)\\
\text{prev.prev.prev.prev.prev.prev.seg.cx} &=& 490\\
\text{prev.ind} &=& a_{18}\\
\text{prev.prev.prev.prev.prev.prev.prev.ind} &=& a_{12}\\
\text{prev.prev.prev.prev.prev.seg.cy} &=& 714\\
\text{prev.prev.prev.prev.prev.prev.prev.prev.prev.seg.cx} &=& 138\\
\text{prev.seg.cy} &=& 588\\
\text{prev.prev.prev.prev.prev.prev.prev.prev.seg.h} &=& 796\\
\text{prev.prev.prev.prev.prev.pfun} &=& \lambda v:Ind\ .\ \text{dog}(v)\\
\text{prev.prev.prev.prev.pr

<IPython.core.display.Latex object>

## Spatial relations

In [10]:
# An index of IndDetection by Ind.
ind_dets = dict((r.ind, r) for r in ind_detections)

Left = mkptype('left', [Ind, Ind], ['a', 'b'])
Left.learn_witness_condition(lambda ab: ind_dets[ab[0]].seg.cx < ind_dets[ab[1]].seg.cx)
print(show(Left))

print(Left.query((ind_detections[0].ind, ind_detections[1].ind)))
print(Left.query((ind_detections[1].ind, ind_detections[2].ind)))

left(a, b)
True
False


## Text parsing

In [17]:
def create_abc(prop_a, prop_b, rel):
    '''Creates a record type describing two individuals and a relation between them.'''
    return RecType({
        'a_1': Ind,
        'a_2': Ind,
        'c_{' + prop_a + '}': (Fun('v', Ind, mkptype(prop_a)), ['a_1']),
        'c_{' + prop_b + '}': (Fun('v', Ind, mkptype(prop_b)), ['a_2']),
        'c_{' + rel + '}': (Fun('a', Ind, Fun('b', Ind, mkptype(rel, [Ind, Ind], ['a', 'b']))), ['a_1', 'a_2'])
    })

print("A dog is to the left of a car")
question = create_abc('dog', 'car', 'left')
latex(question)

A dog is to the left of a car
\begin{equation}\left[\begin{array}{rcl}
\text{c}_\text{dog} &:& \langle \lambda v:Ind\ .\ \text{dog}(v), [a_1]\rangle\\
\text{c}_\text{car} &:& \langle \lambda v:Ind\ .\ \text{car}(v), [a_2]\rangle\\
\text{a}_\text{1} &:& Ind\\
\text{c}_\text{left} &:& \langle \lambda a:Ind\ .\ \lambda b:Ind\ .\ \text{left}(a, b), [a_1, a_2]\rangle\\
\text{a}_\text{2} &:& Ind
\end{array}\right]\end{equation}


<IPython.core.display.Latex object>

In [18]:
import nltk

grammar = nltk.grammar.FeatureGrammar.fromstring('''
%start S
S[SEM=(?a, ?b, ?prep)] -> NP[SEM=?a] 'is' Prep[SEM=?prep] NP[SEM=?b]
NP[DEF=?def, SEM=?n] -> Det[DEF=?def] N[SEM=?n]
N[SEM=<dog>] -> 'dog'
N[SEM=<car>] -> 'car'
N[SEM=<person>] -> 'person'
N[SEM=<chair>] -> 'chair'
Det -> 'a' | 'an'
Prep[SEM=<left>] -> 'to' 'the' 'left' 'of'
Prep[SEM=<right>] -> 'to' 'the' 'right' 'of'
Prep[SEM=<above>] -> 'above'
Prep[SEM=<under>] -> 'under'
''')
parser = nltk.FeatureChartParser(grammar)

texts = [
    'A dog is to the left of a car',
#     'There is a dog to the left of a car',
#     'Is the dog to the left of the car',
#     'Is there a dog to the left of the car',
]

def parse_abc(text):
    trees = parser.parse(text.lower().split())
    tree = list(trees)[0]
    sem = nltk.sem.root_semrep(tree)
    return create_abc(*(str(s) for s in sem))

for text in texts:
    r = parse_abc(text)
    print(show(r))

Latex(to_ipython_latex(r))

{c_{dog} : (lambda v:Ind . dog(v), [a_1]), c_{car} : (lambda v:Ind . car(v), [a_2]), a_1 : Ind, c_{left} : (lambda a:Ind . lambda b:Ind . left(a, b), [a_1, a_2]), a_2 : Ind}


<IPython.core.display.Latex object>

In [19]:
# Testing Relabel() fix.
r.Relabel('a_1', 'a_{dog}')
Latex(to_ipython_latex(r))

<IPython.core.display.Latex object>