In [1]:
import numpy as np
import pandas as pd
books = pd.read_csv("books.csv")[["book_id", "title", "authors"]]
ratings = pd.read_csv("ratings.csv")

In [2]:
# Create dataframe with my ratings and a custom user id
my_user_id = -1
my_ratings = [
    ["The Hitchhiker's Guide to the Galaxy (Hitchhiker's Guide to the Galaxy, #1)", 5],
    ["The Martian", 5],
    ["Surely You're Joking, Mr. Feynman!: Adventures of a Curious Character", 5],
    ['Going Solo', 5],
    ["Flatland: A Romance of Many Dimensions", 5],
    ["Gödel, Escher, Bach: An Eternal Golden Braid", 5],
    ["The Hundred-Year-Old Man Who Climbed Out of the Window and Disappeared", 5],
    ["Gut: The Inside Story of Our Body's Most Underrated Organ", 5],
    ["Brave New World", 4],
    ["The Three-Body Problem (Remembrance of Earth's Past, #1)", 2],
    ["The Dark Forest (Remembrance of Earth's Past, #2)", 2],
    ["The Remains of the Day", 5],
    ["The Pursuit of Happyness", 5],
    ["Animal Farm", 5],
    ["1984", 5],
    ["Norwegian Wood", 5],
    ["Three Men in a Boat (Three Men, #1)", 5],
    ["Lord of the Flies", 3],
    ["Buddenbrooks: The Decline of a Family", 1],
    ["To Kill a Mockingbird", 5],
    ["Harry Potter and the Sorcerer's Stone (Harry Potter, #1)", 3],
    ["Harry Potter and the Chamber of Secrets (Harry Potter, #2)", 3],
    ["Harry Potter and the Prisoner of Azkaban (Harry Potter, #3)", 3],
    ["Harry Potter and the Goblet of Fire (Harry Potter, #4)", 3],
    ["Harry Potter and the Order of the Phoenix (Harry Potter, #5)", 3],
    ["Harry Potter and the Half-Blood Prince (Harry Potter, #6)", 3],
    ["Harry Potter and the Deathly Hallows (Harry Potter, #7)", 3],
    ["Perfume: The Story of a Murderer", 1],
    ["Sapiens: A Brief History of Humankind", 5],
    ["The Circle", 1],
    ["The Reader", 2],
    ["Cloud Atlas", 5],
    ["A Briefer History of Time", 5],
    ["The Grand Design", 4],
    ["The Universe in a Nutshell", 4],
    ["All Quiet on the Western Front", 5],
    ["Inferno (Robert Langdon, #4)", 3],
    ["The Da Vinci Code (Robert Langdon, #2)", 4],
    ["I Am Legend", 5],
    ["Catch Me If You Can: The True Story of a Real Fake", 4],
    ["Memoirs of a Geisha", 5],
    ["A Fine Balance", 5],
    ["Man's Search for Meaning", 5],
    ["Dune (Dune Chronicles #1)", 4],
    ["The Kite Runner", 5],
    ["Kon-Tiki: Across The Pacific In A Raft", 5],
    ["Seven Years in Tibet", 5],
    ["The Diary of a Young Girl", 5],
    ["The Alchemist", 5],
    ["Siddhartha", 5],
    ["The Glass Bead Game", 4],
    ["Demian. Die Geschichte von Emil Sinclairs Jugend", 5],
    ["Steppenwolf", 4],
    ["Quo Vadis", 5],
    ["P.S. I Love You", 4],
    ["The Pillars of the Earth (The Kingsbridge Series, #1)", 4],
    ["Eye of the Needle", 4],
    ["Eragon (The Inheritance Cycle, #1)", 4],
    ["Wild Swans: Three Daughters of China", 5],
    ["I Am Malala: The Story of the Girl Who Stood Up for Education and Was Shot by the Taliban", 5]
] 
my_ratings = pd.DataFrame(my_ratings, columns=['title', 'rating'])
my_ratings['user_id'] = my_user_id
my_ratings = pd.merge(my_ratings, books, on='title', how='left')

# Append my ratings to the ratings table
ratings = ratings.append(my_ratings[['user_id', 'book_id', 'rating']], ignore_index=True)
ratings = ratings.merge(books[["book_id", "title"]], how="left", on="book_id").dropna()

In [3]:
uii_matrix = ratings.pivot_table(
  index=["user_id"], 
  columns=["title"], 
  values="rating").fillna(np.nan)

In [4]:
similarities = uii_matrix.corrwith(uii_matrix.loc[my_user_id], axis=1)

  c = cov(x, y, rowvar)
  c *= np.true_divide(1, fact)


In [5]:
minimum_number_of_books_rated_in_common = 10

# Only use those which have an intersection of more than n books
my_books_read = uii_matrix.loc[my_user_id].notna()
intersections = uii_matrix.apply(lambda x: (x.notna() & my_books_read).sum(), axis=1)
similarities[intersections < minimum_number_of_books_rated_in_common] = np.nan

# Remove self similarity
similarities[my_user_id] = np.nan

In [6]:
minimal_similarity = 0.7
minimal_number_of_ratings = 5

def scoring(column):
    # Consider those users with at least a similarity of minimal_similarity
    neighbours = similarities > minimal_similarity
    # Calculate weighted mean of ratings as scores
    numerator = np.sum(column[neighbours]*similarities[neighbours])
    denominator = np.sum(similarities[neighbours][column[neighbours].notna()])
    predicted_rating = numerator/denominator if denominator != 0 else np.nan

    # If book has been rated less than minimal_number_of_ratings, set its score to nan
    if column[neighbours].notna().sum() <= minimal_number_of_ratings:
        predicted_rating = np.nan
    return predicted_rating

scores = uii_matrix.apply(lambda x: scoring(x))

# Print only recommendations of books I haven't read:
print(scores[~my_books_read].sort_values(ascending=False)[:10])

title
Lonesome Dove                                                                  4.884893
Ahab's Wife, or The Star-Gazer                                                 4.852697
Someone Knows My Name                                                          4.844823
The Diving Bell and the Butterfly                                              4.828780
The Nightingale                                                                4.821673
A Storm of Swords: Blood and Gold (A Song of Ice and Fire, #3: Part 2 of 2)    4.810564
The Power of One (The Power of One, #1)                                        4.778453
The Book of Mormon: Another Testament of Jesus Christ                          4.777642
The Complete Maus (Maus, #1-2)                                                 4.772905
Maus I: A Survivor's Tale: My Father Bleeds History (Maus, #1)                 4.745223
dtype: float64
