# SNaCK Quickstart: Let's build an embedding on Yummly-10k!

In [None]:
# Must set up dependencies
!conda install -y -c https://conda.anaconda.org/gcr snack
!pip install bokeh
!pip install bokeh_image_explore

Fetching package metadata: ........
Solving package specifications: .............
# All requested packages already installed.
# packages in environment at /Users/michael/local/miniconda/envs/snack-zeroinstall:
#
snack                     0.0.3                  nppy27_0  
Collecting bokeh
  Using cached bokeh-0.10.0.tar.gz
Collecting six>=1.5.2 (from bokeh)
  Downloading six-1.10.0-py2.py3-none-any.whl
Collecting requests>=1.2.3 (from bokeh)
  Downloading requests-2.8.1-py2.py3-none-any.whl (497kB)
[K    100% |████████████████████████████████| 499kB 510kB/s 
[?25hCollecting PyYAML>=3.10 (from bokeh)
  Downloading PyYAML-3.11.tar.gz (248kB)
[K    100% |████████████████████████████████| 249kB 904kB/s 
[?25hCollecting python-dateutil>=2.1 (from bokeh)
  Downloading python_dateutil-2.4.2-py2.py3-none-any.whl (188kB)
[K    100% |████████████████████████████████| 192kB 1.4MB/s 
Collecting pandas>=0.11.0 (from bokeh)
  Downloading pandas-0.17.0.tar.gz (6.5MB)
[K    100% |████████████████

In [None]:
from bokeh.plotting import output_notebook, figure, show, hplot
from bokeh_image_explore import explore_embedding
import simplejson
import snack
import bz2
output_notebook()

# Load the data
with bz2.BZ2File("food-10k/dataset.json.bz2") as f:
    dset = simplejson.load(f)
ftrs = np.load("food-10k/features.npy").astype('float')
uuid_map = {uuid: i for i,uuid in enumerate(dset['image_uuids'])}
urls = [dset['uuid_to_url'][id] for id in dset['image_uuids']]
triplets = []
with bz2.BZ2File("food-10k/all-triplets.txt.bz2") as f:
    for line in f.readlines():
        (a,b,c) = line.replace("\n","").split(" ")
        triplets.append( (uuid_map[a], uuid_map[b], uuid_map[c]) )
triplets = np.array(triplets)

Build an embedding.
===================

Try playing with the SNaCK [parameters](http://nbviewer.ipython.org/github/cornelltech/snack/blob/master/Examples.ipynb) to get different embeddings!

This typically takes 200 seconds. Only hit "Run" once!
-----------------------------------------

In [5]:
X = snack.snack_embed(
    # The deep-learned features
    X_np = ftrs,
    contrib_cost_tsne = 500.0,
    # The expert constraints
    triplets = triplets,
    contrib_cost_triplets = 0.05,
    theta = 0.5,
    verbose = True,
)

Using no_dims = 2, perplexity = 30.000000, and theta = 0.500000
Computing input similarities...
Learning embedding...
Iter 0
Iter 50
Iter 100
Iter 150
Iter 200
Iter 250


Display the embedding
======================
Zoom and pan with the mouse wheel. Images appear when you zoom.
---------------------

In [13]:
show(explore_embedding(X, urls, title="Yummly 10k"))