In [None]:
import sqlite3
import pandas as pd
import numpy as np
import time

In [None]:
con = sqlite3.connect('data/crawl.sqlite')

In [None]:
recs = pd.read_sql_query("SELECT * FROM recommendations", con)

In [None]:
def complete_tree(df, search_id):
    prev_recs = set([])
    res = df.copy().filter(['video_id', 'recommendation', 'depth'])
    const_depth = 5
    for depth in df.depth.unique():
        # parent_ids == the video_ids for the depth we're currently at. The set difference
        # between parent_ids and the recommendations from the previous level gives the set
        # of nodes we truncated (i.e. didn't follow recommendations for)
        parent_ids = (res
                     .query('depth == @depth')
                     .video_id
                     .values)
        truncd_ids = set(prev_recs) - set(parent_ids)
        prev_recs = (res
                    .query('depth == @depth')
                    .recommendation
                    .values)
        if not truncd_ids:
            continue
            
        # iterate through truncated ids, merging in their recommendations and then appending
        # to our result
        for video_id in truncd_ids:
            if video_id is None:
                continue
            to_append = (pd.DataFrame.from_dict({video_id: depth}, orient='index')
                        .reset_index()
                        .rename(columns={'index':'video_id', 0:'depth'}))
            # if we're past our point of critical depth, sample
            to_merge = (df
                       .query('video_id == @video_id'))
            source_depth = to_merge.depth.values[0]
            
            # if we (a) want to be sampling, but (b) our source recommendations were not sampled,
            # sample them ourselves
            if depth > const_depth and source_depth < const_depth:
                # ugly line to get the indices of a random sample of the recommendations
                sample_inds = np.where(np.random.rand(to_merge.shape[0]) < 1 / to_merge.shape[0])[0]
                to_merge = to_merge.iloc[sample_inds]
                
            to_append = (to_append
                        .merge(to_merge[['video_id', 'recommendation']],
                              how='right', on='video_id'))
            res = res.append(to_append)
    res.assign(search_id=search_id)
    return res