# Image recognition with TTR


## Bridging between perceptual and conceptual domains

Let's apply the object detection representation proposed in Dobnik & Cooper's *Interfacing language, spatial perception and cognition in TTR* to image recognition.

![Fig 8](fig/lspc-fig8.png)

Here, we use `Image` instead of `PointMap` for the whole, but instead of `reg:PointMap` we use yet another type (and rename it), `seg:Segment`. In Cooper's case the same type can be used to represent both the region and the whole, because a `PointMap` is a set of absolute positions. With `Image`, positions are relative to an origin, which needs to be specified when cropping.

I guess in the general case, the domain of an `ObjectDetector` function need not be the same as the `reg` fields in the output elements.

In [2]:
import sys
sys.path.append('pyttr')
from pyttr.ttrtypes import *
from pyttr.utils import *
import PIL.Image

ttrace()

# Basic types.

Ind = BType('Ind')

Int = BType('Int')
Int.learn_witness_condition(lambda x: isinstance(x, int))
print(Int.query(365))

Image = BType('Image')
Image.learn_witness_condition(lambda x: isinstance(x, PIL.Image.Image))
img = PIL.Image.open('res/dogcar.jpg')
print(Image.query(img))

# Segment type: a rectangular area of a given image.

Segment = RecType({#'i': Image,
    'cx': Int, 'cy': Int, 'w': Int, 'h': Int})
print(Segment.query(Rec({#'i': img,
    'cx': 100, 'cy': 150, 'w': 40, 'h': 20})))

# Redefine Image.show() to work with Rec.show().
def image_show(self):
    return str(self)
PIL.Image.Image.show = image_show
show(img)

True
True
True


'<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1080x1080 at 0x7F0B981042E8>'

In [3]:
def latex(*objs):
    texcode = '\n\n'.join(to_ipython_latex(obj) for obj in objs)
    print(texcode)
    return Latex(texcode)

In [4]:
latex(Segment)

\begin{equation}\left[\begin{array}{rcl}
\text{w} &:& Int\\
\text{h} &:& Int\\
\text{cy} &:& Int\\
\text{cx} &:& Int
\end{array}\right]\end{equation}


<IPython.core.display.Latex object>

In [5]:
Ppty = FunType(Ind, Ty)
ImageDetection = RecType({'x': Ind, 'seg': Segment, 'pfun': Ppty})
ImageDetections = ListType(ImageDetection)
ObjectDetector = FunType(Image, ImageDetections)

latex(Ppty, ObjectDetector)

\begin{equation}\left(\begin{array}{rcl}
Ind\rightarrow Ty
\end{array}\right)\end{equation}

\begin{equation}\left(\begin{array}{rcl}
Image\rightarrow \left[\begin{array}{rcl}
\left[\begin{array}{rcl}
\text{x} &:& Ind\\
\text{pfun} &:& \left(\begin{array}{rcl}
Ind\rightarrow Ty
\end{array}\right)\\
\text{seg} &:& \left[\begin{array}{rcl}
\text{w} &:& Int\\
\text{h} &:& Int\\
\text{cy} &:& Int\\
\text{cx} &:& Int
\end{array}\right]
\end{array}\right]
\end{array}\right]
\end{array}\right)\end{equation}


<IPython.core.display.Latex object>

## Object detection model YOLO

Requires OpenCV and [Darkflow](https://github.com/thtrieu/darkflow). `yolo.weights` is from [Yolo](https://pjreddie.com/darknet/yolo/).

In [6]:
from darkflow.net.build import TFNet
import numpy as np

tfnet = TFNet({"model": "yolo/yolo.cfg", "load": "yolo/yolo.weights",
    'config': 'yolo', "threshold": 0.1})
yolo_out = dict()
def yolo(img):
    if str(img) not in yolo_out:
        yolo_out[str(img)] = tfnet.return_predict(np.array(img))
    return yolo_out[str(img)]

Parsing yolo/yolo.cfg
Loading yolo/yolo.weights ...
Successfully identified 203934260 bytes
Finished in 0.040488481521606445s
Model has a coco model name, loading coco labels.

Building net ...
Source | Train? | Layer description                | Output size
-------+--------+----------------------------------+---------------
       |        | input                            | (?, 608, 608, 3)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 608, 608, 32)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 304, 304, 32)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 304, 304, 64)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 152, 152, 64)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 152, 152, 128)
 Load  |  Yep!  | conv 1x1p0_1  +bnorm  leaky      | (?, 152, 152, 64)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 152, 152, 128)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 76, 76, 128)
 Load  |  Yep!  | conv 3x3p1_1  +b

In [7]:
# Make preds and ptypes identifiable by their predicate names.
# From now on, use mktype().
ptypes = dict()
def mkptype(sym, types=[Ind], vars=['v']):
    id = '/'.join([sym, ','.join(show(type) for type in types), ','.join(vars)])
    if id not in ptypes:
        ptypes[id] = PType(Pred(sym, types), vars)
    return ptypes[id]

print(show(mkptype('rabbit') is mkptype('rabbit')))

True


In [8]:
def xy1xy2_to_cwh(x1, y1, x2, y2):
    '''Transform to center, width and height.'''
    return {'cx': int(x1/2 + x2/2), 'cy': int(y1/2 + y2/2), 'w': x2 - x1, 'h': y2 - y1}

In [12]:
def yolo_detector(i):
    return [Rec({
        'x': Ind.create(),
        'seg': Rec({
            #'i': i,
            **xy1xy2_to_cwh(o['topleft']['x'], o['topleft']['y'], o['bottomright']['x'], o['bottomright']['y']),
        }),
        'pfun': Fun('v', Ind, mkptype(o['label'].replace(' ', '_'))),
    }) for o in yolo(i)] # @todo RBG/BGR?

image_detections = yolo_detector(img)

print(ImageDetections.query(image_detections))
print(ImageDetection.query(image_detections[0]))
print(Ppty.query(image_detections[0].pfun))
print(Segment.query(image_detections[0].seg))

latex(image_detections[-1])

True
True
True
True
\begin{equation}\left[\begin{array}{rcl}
\text{seg} &=& \left[\begin{array}{rcl}
\text{h} &=& 107\\
\text{w} &=& 71\\
\text{cy} &=& 544\\
\text{cx} &=& 44
\end{array}\right]\\
\text{x} &=& a_{19}\\
\text{pfun} &=& \lambda v:Ind\ .\ \text{clock}(v)
\end{array}\right]\end{equation}


<IPython.core.display.Latex object>

In [18]:
def sit_prop(r):
    return RecType({
        'x': SingletonType(Ind, r.x),
        'c': (r.pfun, ['x'])
    })
latex(sit_prop(image_detections[0]))

\begin{equation}\left[\begin{array}{rcl}
\text{c} &:& \langle \lambda v:Ind\ .\ \text{person}(v), [ x]\rangle\\
\text{x} &:& Ind_{a_{10}}
\end{array}\right]\end{equation}


<IPython.core.display.Latex object>

In [19]:
Loc = mkptype('location', [Ind, Segment], ['v_1', 'v_2'])
LocFun = Fun('v_1', Ind, Fun('v_2', Segment, Loc))

def sit_loc(r):
    return RecType({
        'x': SingletonType(Ind, r.x),
        'c': (LocFun, ['x', r.seg]),
    })
latex(sit_loc(image_detections[0]))

\begin{equation}\left[\begin{array}{rcl}
\text{c} &:& \langle \lambda v_1:Ind\ .\ \lambda v_2:\left[\begin{array}{rcl}
\text{w} &:& Int\\
\text{h} &:& Int\\
\text{cy} &:& Int\\
\text{cx} &:& Int
\end{array}\right]\ .\ \text{location}(v_1, v_2), [ x, \left[\begin{array}{rcl}
\text{h} &=& 809\\
\text{w} &=& 276\\
\text{cy} &=& 654\\
\text{cx} &=& 138
\end{array}\right]]\rangle\\
\text{x} &:& Ind_{a_{10}}
\end{array}\right]\end{equation}


<IPython.core.display.Latex object>

## Spatial relations

In [25]:
# An index of IndDetection by Ind.
ind_dets = dict((r.x, r) for r in image_detections)

Left = mkptype('left', [Ind, Ind], ['a', 'b'])
Left.learn_witness_condition(lambda ab: ind_dets[ab[0]].seg.cx < ind_dets[ab[1]].seg.cx)
print(show(Left))

print(Left.query((image_detections[0].x, image_detections[1].x)))
print(Left.query((image_detections[1].x, image_detections[2].x)))

left(a, b)
True
False


In [32]:
binary_relations = [Left]

def sit_rel(r, s):
    for rel in binary_relations:
        if rel.query((r.x, s.x)):
            yield RecType({
                'x': SingletonType(Ind, r.x),
                'y': SingletonType(Ind, s.x),
                'c': (rel, ['x', 'y']),
            })
        
latex(list(sit_rel(image_detections[0], image_detections[1])),
      list(sit_rel(image_detections[1], image_detections[0])))

\begin{equation}[ \left[\begin{array}{rcl}
\text{x} &:& Ind_{a_{10}}\\
\text{y} &:& Ind_{a_{11}}\\
\text{c} &:& \langle \text{left}(a, b), [ x, y]\rangle
\end{array}\right]]\end{equation}

\begin{equation}[ ]\end{equation}


<IPython.core.display.Latex object>

## Combining commitments

In [36]:
from functools import reduce
def combine_prev(*ts):
    return reduce(lambda t1, t2: RecType({'prev': t1}).merge(t2) if t2 else t1, ts)
latex(combine_prev(RecType({'a': 'A'}), RecType({'b': 'B'})))

\begin{equation}\left[\begin{array}{rcl}
\text{prev} &:& \left[\begin{array}{rcl}
\text{a} &:& A
\end{array}\right]\\
\text{b} &:& B
\end{array}\right]\end{equation}


<IPython.core.display.Latex object>

In [42]:
from itertools import product

image_detections_few = image_detections[-3:]
situations = [sit_prop(r) for r in image_detections_few] \
    + [sit_loc(r) for r in image_detections_few] \
    + sum((list(sit_rel(r, s)) for (r, s) in product(image_detections_few, image_detections_few)), [])
comb = combine_prev(*situations)
latex(comb)

\begin{equation}\left[\begin{array}{rcl}
\text{prev} &:& \left[\begin{array}{rcl}
\text{prev} &:& \left[\begin{array}{rcl}
\text{prev} &:& \left[\begin{array}{rcl}
\text{prev} &:& \left[\begin{array}{rcl}
\text{prev} &:& \left[\begin{array}{rcl}
\text{prev} &:& \left[\begin{array}{rcl}
\text{prev} &:& \left[\begin{array}{rcl}
\text{prev} &:& \left[\begin{array}{rcl}
\text{c} &:& \langle \lambda v:Ind\ .\ \text{sofa}(v), [ x]\rangle\\
\text{x} &:& Ind_{a_{17}}
\end{array}\right]\\
\text{c} &:& \langle \lambda v:Ind\ .\ \text{cell_phone}(v), [ x]\rangle\\
\text{x} &:& Ind_{a_{18}}
\end{array}\right]\\
\text{c} &:& \langle \lambda v:Ind\ .\ \text{clock}(v), [ x]\rangle\\
\text{x} &:& Ind_{a_{19}}
\end{array}\right]\\
\text{c} &:& \langle \lambda v_1:Ind\ .\ \lambda v_2:\left[\begin{array}{rcl}
\text{w} &:& Int\\
\text{h} &:& Int\\
\text{cy} &:& Int\\
\text{cx} &:& Int
\end{array}\right]\ .\ \text{location}(v_1, v_2), [ x, \left[\begin{array}{rcl}
\text{h} &=& 803\\
\text{w} &=& 957\\
\tex

<IPython.core.display.Latex object>

In [43]:
combflat = comb.flatten()
latex(combflat)

\begin{equation}\left[\begin{array}{rcl}
\text{prev.prev.prev.prev.c} &:& \langle \lambda v_1:Ind\ .\ \lambda v_2:\left[\begin{array}{rcl}
\text{h} &:& Int\\
\text{cy} &:& Int\\
\text{w} &:& Int\\
\text{cx} &:& Int
\end{array}\right]\ .\ \text{location}(v_1, v_2), [ prev.prev.prev.prev.x, \left[\begin{array}{rcl}
\text{h} &=& 423\\
\text{cy} &=& 588\\
\text{w} &=& 187\\
\text{cx} &=& 93
\end{array}\right]]\rangle\\
\text{prev.prev.prev.prev.prev.prev.prev.prev.c} &:& \langle \lambda v:Ind\ .\ \text{sofa}(v), [ prev.prev.prev.prev.prev.prev.prev.prev.x]\rangle\\
\text{prev.prev.prev.x} &:& Ind_{a_{19}}\\
\text{x} &:& Ind_{a_{19}}\\
\text{prev.prev.prev.c} &:& \langle \lambda v_1:Ind\ .\ \lambda v_2:\left[\begin{array}{rcl}
\text{h} &:& Int\\
\text{cy} &:& Int\\
\text{w} &:& Int\\
\text{cx} &:& Int
\end{array}\right]\ .\ \text{location}(v_1, v_2), [ prev.prev.prev.x, \left[\begin{array}{rcl}
\text{h} &=& 107\\
\text{cy} &=& 544\\
\text{w} &=& 71\\
\text{cx} &=& 44
\end{array}\right]]\ran

<IPython.core.display.Latex object>

## Text parsing

In [17]:
def create_abc(prop_a, prop_b, rel):
    '''Creates a record type describing two individuals and a relation between them.'''
    return RecType({
        'a_1': Ind,
        'a_2': Ind,
        'c_{' + prop_a + '}': (Fun('v', Ind, mkptype(prop_a)), ['a_1']),
        'c_{' + prop_b + '}': (Fun('v', Ind, mkptype(prop_b)), ['a_2']),
        'c_{' + rel + '}': (Fun('a', Ind, Fun('b', Ind, mkptype(rel, [Ind, Ind], ['a', 'b']))), ['a_1', 'a_2'])
    })

print("A dog is to the left of a car")
question = create_abc('dog', 'car', 'left')
latex(question)

A dog is to the left of a car
\begin{equation}\left[\begin{array}{rcl}
\text{c}_\text{dog} &:& \langle \lambda v:Ind\ .\ \text{dog}(v), [a_1]\rangle\\
\text{c}_\text{car} &:& \langle \lambda v:Ind\ .\ \text{car}(v), [a_2]\rangle\\
\text{a}_\text{1} &:& Ind\\
\text{c}_\text{left} &:& \langle \lambda a:Ind\ .\ \lambda b:Ind\ .\ \text{left}(a, b), [a_1, a_2]\rangle\\
\text{a}_\text{2} &:& Ind
\end{array}\right]\end{equation}


<IPython.core.display.Latex object>

In [18]:
import nltk

grammar = nltk.grammar.FeatureGrammar.fromstring('''
%start S
S[SEM=(?a, ?b, ?prep)] -> NP[SEM=?a] 'is' Prep[SEM=?prep] NP[SEM=?b]
NP[DEF=?def, SEM=?n] -> Det[DEF=?def] N[SEM=?n]
N[SEM=<dog>] -> 'dog'
N[SEM=<car>] -> 'car'
N[SEM=<person>] -> 'person'
N[SEM=<chair>] -> 'chair'
Det -> 'a' | 'an'
Prep[SEM=<left>] -> 'to' 'the' 'left' 'of'
Prep[SEM=<right>] -> 'to' 'the' 'right' 'of'
Prep[SEM=<above>] -> 'above'
Prep[SEM=<under>] -> 'under'
''')
parser = nltk.FeatureChartParser(grammar)

texts = [
    'A dog is to the left of a car',
#     'There is a dog to the left of a car',
#     'Is the dog to the left of the car',
#     'Is there a dog to the left of the car',
]

def parse_abc(text):
    trees = parser.parse(text.lower().split())
    tree = list(trees)[0]
    sem = nltk.sem.root_semrep(tree)
    return create_abc(*(str(s) for s in sem))

for text in texts:
    print(text)
    r = parse_abc(text)
    print(show(r))

latex(r)

{c_{dog} : (lambda v:Ind . dog(v), [a_1]), c_{car} : (lambda v:Ind . car(v), [a_2]), a_1 : Ind, c_{left} : (lambda a:Ind . lambda b:Ind . left(a, b), [a_1, a_2]), a_2 : Ind}


<IPython.core.display.Latex object>