In [None]:
from mechanicalsoup import Browser
import numpy as np
import pandas as pd
import re

def sanitize_movie_title(t):
    return t.replace(' ', '+')

def get_movie_recommendation(movie, url_tmpl='http://www.movie-map.com/{}.html'):
    movie_sanitized = sanitize_movie_title(movie)
    url = url_tmpl.format(movie_sanitized)
    b = Browser()
    page = b.get(URL)
    recommendations = page.soup.select('#gnodMap a')
    assert len(recommendations)
    df = pd.DataFrame(s, columns=['link'])
    df['s'] = df.link.apply(lambda x: x.attrs['id'].replace('s', ''))
    df['name'] = df.link.apply(lambda x: x.text)
    
    for name, link_tmpl in [
        ('youtube', 'https://www.youtube.com/results?search_query={}+trailer'),
        ('tastedive', 'https://tastedive.com/like/{}'),
        ('imdb', 'http://www.imdb.com/find?ref_=nv_sr_fn&q={}&s=all')
    ]:
        df[name] = df.name.apply(lambda x: link_tmpl.format(sanitize_movie_title(x)))
    df = df.sort_values('s').set_index('name')
    similarities = get_similarities(page.content)
    assert len(df) == similarities.shape[0] == similarities.shape[1]
    relevant_similarities = similarities[:,0]
    df['similarity'] = relevant_similarities
    df = df.sort_values('similarity')
    return df

def get_similarities(page):
    similarities_raw = re.findall(r'Aid\[\d+\]=new Array\((.+?)\)', str(page))
    num_similarities = len(similarities_raw)
    mat = np.zeros((num_similarities, num_similarities), dtype=np.float)
    for idx, similarity_arr in enumerate(similarities_raw):
        assert similarity_arr.count(',') == num_similarities - 1
        s = [float(x) for x in similarity_arr.split(',')]
        mat[idx,:] = s
    return mat

In [None]:
MOVIE = 'in the loop'
df = get_movie_recommendation(MOVIE)

for name, df_ in df.iterrows():
    print('{name:40} ({df.similarity:5.2f})\n\t{df.youtube:<90}\n\t{df.tastedive:<}\n\t{df.imdb}\n\n'.format(df=df_, name=name))