In [None]:
import pandas as pd
import numpy as np
import difflib
from haversine import haversine, Unit
import folium

In [None]:
#this function just loads necessary cleaned data and sets necessary parameters for later operations
def input_take(USDA_csv,Recipe_csv,zip_csv):
    farm = pd.read_csv(USDA_csv)
    rec = pd.read_csv(Recipe_csv)
    zip_codes = pd.read_csv(zip_csv, delimiter = ';')[['Zip','Latitude','Longitude']]
    rec['title'] = recipes['title'].astype(str)
    rec['matched_ingredient'] = recipes['matched_ingredient'].astype(str)
    farm['LAT'] = farms['LAT'].astype(float)
    farm['LON'] = farms['LON'].astype(float)
    farm['INGREDIENT']=farms['INGREDIENT'].apply(lambda x: x.lower())
    
    return [farm,rec,zip_codes]

In [None]:
#INPUT CELL:
def user_input():
    a = input("Please enter a recipe (e.g. Pasta Putanesca): ")
    zip_code = int(input("Please enter your zip code: "))
    ind_zips = set(zips['Zip'])
    zeta = 0
    while zeta == 0:
        if zip_code in ind_zips:
            re_zip = zips[zips['Zip']==zip_code]
            b = float(re_zip.Latitude)
            c = float(re_zip.Longitude)
            zeta = 1
        else:
            zip_code = int(input("That zip code did not work. Please try again: "))
    return [a,b,c]

In [None]:
#function to find name matches to the input recipe search, can vary number of returned recipes with minimum similarity
def match_generator(x,y,z=3,u=0.1):
    a = y.title.unique().tolist()
    b = difflib.get_close_matches(x, a, n=z, cutoff=u)
    return y[y.title.isin(b)]

In [None]:
#function to search for all ingredient entries from USDA which match items in the returned recipes 
#and calculate distances 
def farm_finder(a,b,Lat_1,Lon_1):
    i_list = a.matched_ingredient.unique().tolist()
    fs = b[b.INGREDIENT.isin(i_list)]
    fs.loc[:,'es'] = fs.apply(lambda x: haversine((Lat_1,Lon_1),(x.LAT, x.LON),unit='mi'), axis=1)
    return fs

In [None]:
#functions to return minimum distance for an ingredient in USDA data 
def my_agg(x):
    names = {
        'score': x['es'].min()}

    return pd.Series(names, index=['score'])

def min_agg(x):
    return x[x.groupby(['INGREDIENT'])['es'].transform(min) == x['es']] \
    .groupby(['INGREDIENT','LAT','LON']).apply(my_agg).reset_index().set_index(['INGREDIENT'])

beta = min_agg(farm_search)


#function to attach minimum distances for ingredients to recipe data
def score_ex(x):
    try:
        alpha = beta.loc[x,'score']
        kappa = beta.loc[x,'LAT']
        gamma = beta.loc[x,'LON']
    except:
        alpha = np.NaN
        kappa = np.NaN
        gamma = np.NaN
    
    names = {
        'score': alpha,
        'Lat':  kappa,
        'Lon': gamma
        }

    return pd.Series(names, index=['score', 'Lat', 'Lon'])

#function which applies the score_ex function, returns the entry with the lowest average ingredient distance,
#returns a frame to show found and non-found ingredients (show n of N found information),
#and returns a frame with no NaN values, grouping ingredients on lat/ lon for mapping purposes to show all ingredients
def merger(x):
    match_2 = x.merge(x.matched_ingredient.apply(lambda s: score_ex(s)), left_index=True, right_index=True)
    win = match_2.groupby('title').score.agg(['mean']).sort_values('mean', ascending=True).reset_index().title.iloc[0]
    closest = match_2[match_2['title']==win][['original','Lat','Lon']]
    frame = closest.dropna()[['original','Lat','Lon']].groupby(['Lat', 'Lon']).original.apply(list).reset_index()
    return [win,closest,frame]

In [None]:
#function which maps closest sources of ingredients for the recipe with the closest average distance of sourced 
#ingredients, along with printing information.
def face_mapper(a,b,c):
    frame_in = pd.DataFrame({'original': ['Home'], 'Lat': [Lat_in], 'Lon': [Lon_in]})   
    m=folium.Map(location=[Lat_in,Lon_in])
    for item in range(len(c)):
        df_new = pd.concat([c.iloc[[item]], frame_in], sort=False)

        points = list(df_new[['Lat','Lon']].to_records(index=False))
    

        try:
            folium.PolyLine(locations=points,weight=5).add_to(m)
            folium.Marker([df_new["Lat"].iloc[0],df_new["Lon"].iloc[0]], 
                          popup=df_new['original'].iloc[0]).add_to(m)

        except: 
            print('Farm to Face Map has been Canned')
        
    m.fit_bounds([[c.Lat.min(), c.Lon.min()], [c.Lat.max(), c.Lon.max()]])
    
    print('The closest matching recipe is: ' + winner)
    print(str(len(closest_df.dropna())) + ' of ' + str(len(closest_df)) + ' ingredients located')
    return display(m)

In [None]:
def farm_to_face():
    farms, recipes, zips = input_take("USDA_file.csv","Recipe_file.csv","us-zip-code-latitude-and-longitude.csv")
    input_recipe, Lat_in, Lon_in = user_input()
    match_df = match_generator(input_recipe,recipes)
    farm_search = farm_finder(match_df,farms,Lat_in,Lon_in)
    beta = min_agg(farm_search)
    winner, closest_df, closest_frame = merger(match_df)
    return face_mapper(Lat_in,Lon_in,closest_frame)

In [None]:
farm_to_face()