In [1]:
import pickle
import pandas as pd
import numpy as np
import pycountry_convert as pyc

#### Loading useful files

In [2]:
replacements = pickle.load(open("ing_replacements.obj", 'rb'))
ing_coocs = pickle.load(open("ing_coocs.obj", 'rb'))
ing_indices = pickle.load(open("ing_indices.obj", 'rb'))
vocab = pickle.load(open("vocab.obj", 'rb'))
alpha3_dist = pd.read_csv('../resources/dist_cepii.csv', index_col=['iso_o', 'iso_d'])[['dist']]
alpha3_all = set(alpha3_dist.reset_index()['iso_o'].values)
ing_to_country2 = pickle.load(open('../resources/ing_to_country2.p', 'rb'))

In [7]:
def lines_to_json(lines) :
    ings = to_model_ings(lines)
    json = dict()
    for ing, line in zip(ings, lines) :
        loc_score = locality_score(ing)
        json[ing] = {"locality_score":loc_score, "substitutes": [(c[0], locality_score(c[0])) for c in replacements[ing]]}
        
    return json

def ref(ing) :
    matches = []
    
    #build matches using full name ingredients
    for i in vocab :
        if i in ing :
            matches.append(i)
                        
    #sort matchings 
    matches = sorted(matches, key = lambda x : len(x.split(" ")), reverse=True)
    
    max_len = len(matches[0].split(" "))    
    matches = [m for m in matches if len(m.split(" ")) == max_len]
                  
    m_occs = [(m, ing_coocs[ing_indices[m], ing_indices[m]]) for m in matches]
    return sorted(m_occs, key=lambda x : x[1], reverse=True)[0][0]
    #return matches[0]

def to_model_ings(ings) :
    return [ref(i) for i in ings]

def locality_score(i, c='Switzerland'):
    max_dist = alpha3_dist['dist'].max()
    to_alpha3 = lambda c: pyc.country_name_to_country_alpha3(c)
    producers = [p for p in ing_to_country2[i] if to_alpha3(p) in alpha3_all]
    if c in producers:
        return 1, c
    else:
        distances = [alpha3_dist.loc[to_alpha3(c), to_alpha3(p)]['dist'] for p in producers]
        if len(distances) == 0:
            return -1, None
        dist_idx = np.argmin(distances)
        min_dist = distances[dist_idx]
    return  1 - np.power(min_dist / max_dist, 0.6), producers[dist_idx]
        
jo_ings = ["olive oil", 
           "1 x 1.5 kg whole free-range chicken", 
           "4 onions", 
           "4 carrots", 
           "2 cloves of garlic", 
           "½ a bunch of fresh rosemary", 
           "1 heaped tablespoon plain flour",
           "300 ml white wine",
           "200 ml organic chicken stock",
           "600 g potatoes",
           "2 handfuls of red and green grapes",
           "a few sprigs of fresh flat-leaf parsley"
          ]

lines_to_json(jo_ings)

{'oil': {'locality_score': (1, 'Switzerland'),
  'substitutes': [('fat', (-1, None)), ('lard', (-1, None))]},
 'chicken': {'locality_score': (-1, None),
  'substitutes': [('turkey', (-1, None)),
   ('fryer', (1, 'Switzerland')),
   ('turkey meat', (-1, None)),
   ('pork steak', (1, 'Switzerland')),
   ('turkey drumstick', (0.8668531570649267, 'Italy')),
   ('crab', (-1, None)),
   ('loin', (-1, None)),
   ('halibut', (-1, None)),
   ('snapper', (-1, None)),
   ('lean pork shoulder', (1, 'Switzerland')),
   ('sirloin steak', (0.7777470793826322, 'Portugal'))]},
 'onion': {'locality_score': (1, 'Switzerland'), 'substitutes': []},
 'carrot': {'locality_score': (1, 'Switzerland'),
  'substitutes': [('vegetable', (1, 'Switzerland')),
   ('savory', (1, 'Switzerland'))]},
 'garlic': {'locality_score': (1, 'Switzerland'), 'substitutes': []},
 'rosemary': {'locality_score': (-1, None),
  'substitutes': [('savory', (1, 'Switzerland')),
   ('herb', (-1, None)),
   ('chardonnay', (-1, None)),
   (