In [None]:
import duckdb
import pandas as pd
import re
from collections import Counter
from matplotlib import pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

In [None]:
path_to_export = "/Users/paul_gwu/code/data/MovieLens_Recommender_Export/" ## please replace with your own path!
duckdb.sql(f'''IMPORT DATABASE '{path_to_export}' ''') # parquet is a fast and efficient format
#Let's check that the data all loaded!
duckdb.sql('SHOW TABLES')

┌───────────────────┐
│       name        │
│      varchar      │
├───────────────────┤
│ candidate_items_2 │
│ frequent_items_1  │
│ frequent_items_2  │
│ high_ratings      │
│ high_ratings_base │
│ indicators        │
│ links             │
│ movies            │
│ ratings           │
│ rules             │
│ tags              │
├───────────────────┤
│      11 rows      │
└───────────────────┘

In [None]:
def find_movie_from_title(title):
    return duckdb.sql(f"""SELECT * FROM movies WHERE UPPER(title) LIKE '%{title.upper()}%'""")

def make_recommendation_from_list(list_of_ids):
    list_string = ",".join([str(i) for i in list_of_ids])
    query = f'''
        SELECT
             consequent
            , c_title
            , COUNT(*) AS intersection
            , SUM(PMI) AS sum_pmi
        FROM
            indicators
        WHERE
            antecedent IN ({list_string})
        AND
            consequent NOT IN ({list_string})
        GROUP BY
            consequent
            , c_title
        ORDER BY intersection DESC, sum_pmi DESC
        '''
    return duckdb.sql(query).df()

In [None]:
find_movie_from_title('Inception')

┌─────────┬──────────────────┬─────────────────────────────────────────────────┐
│ movieId │      title       │                     genres                      │
│  int64  │     varchar      │                     varchar                     │
├─────────┼──────────────────┼─────────────────────────────────────────────────┤
│   79132 │ Inception (2010) │ Action|Crime|Drama|Mystery|Sci-Fi|Thriller|IMAX │
└─────────┴──────────────────┴─────────────────────────────────────────────────┘

In [None]:
make_recommendation_from_list([79132,])

Unnamed: 0,consequent,c_title,intersection,sum_pmi
0,74458,Shutter Island (2010),1,2.85228
1,109487,Interstellar (2014),1,2.732242
2,91529,"Dark Knight Rises, The (2012)",1,2.700265
3,48780,"Prestige, The (2006)",1,2.644919
4,106782,"Wolf of Wall Street, The (2013)",1,2.380376
5,116797,The Imitation Game (2014),1,2.37855
6,134130,The Martian (2015),1,2.35478
7,164179,Arrival (2016),1,2.342431
8,58559,"Dark Knight, The (2008)",1,2.220823
9,99114,Django Unchained (2012),1,2.133733
