# Book Recomendations Using Neo4J

In [51]:
# import Neo4J-Python connector
from py2neo import Graph
# set up graph
graph = Graph("bolt://localhost:11001", auth=("neo4j", "password"))

## Method #1: Recs Based Book Title

In [250]:
# Find all user id'S that like a given book (like = rating of 8+)
def find_similar_users(book_title):
    id_results = graph.run("MATCH (b:BX_Book{title: '" + str(book_title) + "'})<-[r:LIKE]-(u:BX_User) RETURN u.ID ").data()
    return id_results

# example
ex1_users = find_similar_users('To Kill a Mockingbird')
ex1_users
      

[{'u.ID': 42323},
 {'u.ID': 199092},
 {'u.ID': 216861},
 {'u.ID': 229440},
 {'u.ID': 59038},
 {'u.ID': 201697},
 {'u.ID': 217889},
 {'u.ID': 40330},
 {'u.ID': 155000},
 {'u.ID': 66942},
 {'u.ID': 236340},
 {'u.ID': 36938},
 {'u.ID': 63625},
 {'u.ID': 37950},
 {'u.ID': 90046},
 {'u.ID': 95359},
 {'u.ID': 251844},
 {'u.ID': 249174},
 {'u.ID': 158774},
 {'u.ID': 27472},
 {'u.ID': 93631},
 {'u.ID': 75429},
 {'u.ID': 37712},
 {'u.ID': 104657},
 {'u.ID': 160509},
 {'u.ID': 49204},
 {'u.ID': 7409},
 {'u.ID': 17950},
 {'u.ID': 2766},
 {'u.ID': 251422},
 {'u.ID': 24194},
 {'u.ID': 209724},
 {'u.ID': 57398},
 {'u.ID': 68861},
 {'u.ID': 193676},
 {'u.ID': 271859},
 {'u.ID': 276953},
 {'u.ID': 129641},
 {'u.ID': 4225},
 {'u.ID': 236058},
 {'u.ID': 108827},
 {'u.ID': 187145},
 {'u.ID': 48268},
 {'u.ID': 136205},
 {'u.ID': 171118},
 {'u.ID': 55187},
 {'u.ID': 140662},
 {'u.ID': 264985},
 {'u.ID': 22365},
 {'u.ID': 219650},
 {'u.ID': 201526},
 {'u.ID': 128622},
 {'u.ID': 98468},
 {'u.ID': 36003},
 {'

In [251]:
# Given a list of users, find all books that they like and make a dataframe of the book names and ratings
def find_books_from_users(user_list):
    book_results = []
    for user in user_list:
        book_results.append(graph.run("MATCH (n:Author)<-[:WRITTEN_BY]-(b:BX_Book)<-[r:LIKE]-(u:BX_User{ID:" + 
                                      str(user["u.ID"]) + "}) RETURN b.title, n.AName, r.BRating").data())
    return book_results

# example
ex1_books = find_books_from_users(ex1_users)
ex1_books

[[{'b.title': 'No Second  Chance',
   'n.AName': 'Harlan Coben',
   'r.BRating': '10'},
  {'b.title': 'To Kill a Mockingbird',
   'n.AName': 'Harper Lee',
   'r.BRating': '10'},
  {'b.title': 'Prime Cut', 'n.AName': 'Diane Mott Davidson', 'r.BRating': '9'},
  {'b.title': 'The Grilling Season',
   'n.AName': 'Diane Mott Davidson',
   'r.BRating': '9'},
  {'b.title': 'The Golden One',
   'n.AName': 'Elizabeth Peters',
   'r.BRating': '8'},
  {'b.title': 'Stone Kiss (Peter Decker &amp; Rina Lazarus Novels (Hardcover))',
   'n.AName': 'Faye Kellerman',
   'r.BRating': '8'},
  {'b.title': 'Dating Dead Men',
   'n.AName': 'HARLEY JANE KOZAK',
   'r.BRating': '9'},
  {'b.title': 'And Justice There Is None',
   'n.AName': 'DEBORAH CROMBIE',
   'r.BRating': '9'},
  {'b.title': 'Pink Slip Party', 'n.AName': 'Cara Lockwood', 'r.BRating': '9'},
  {'b.title': 'True Devotion (Uncommon Heroes, Book 1)',
   'n.AName': 'Dee Henderson',
   'r.BRating': '9'},
  {'b.title': 'The Main Corpse',
   'n.AName'

In [245]:
# creating a dataframe from the book and rating info using pandas
import pandas as pd
import numpy as np

# book title -> data frame
# uses previous functions to find books liked by users who liked input book
# finds amount of reviews each book has and the sum of them 
def make_df(book_title):
    # creating the list of books liked by users who like the same book
    books = find_books_from_users(find_similar_users(book_title))
    # initializing data list
    data_dict = {}
    # looping through list of list of dictionaries of book tiles/ratings
    for user_data in books:
        for book in user_data:
            # filter out book title input
            if book['b.title'] != book_title:
                title = book['b.title']
                rating = int(book['r.BRating'])
                author = book['n.AName']
                # fix '&' not showing up correctly
                title = title.replace("&amp;", "&")
                # addressing duplicate books
                if title not in data_dict.keys():
                    data_dict[title] = [rating, author] 
                else: 
                    # rating column will be total ratings
                    data_dict[title] = [data_dict[title][0] + rating, author]            
    # setting up 
    book_names = []
    authors = []
    ratings = []
    for k,v in data_dict.items():
        book_names.append(k)
        authors.append(v[0])
        ratings.append(v[1])
    data = []
    data.append(book_names)
    data.append(authors)
    data.append(ratings)
    df = pd.DataFrame(data).T
    df.columns = ["Title", "Ratings", "Author"]
    # sort by total ratings
    sorted_df = df.sort_values(by=['Ratings'], ascending=[False])
    return sorted_df
        
df_results = make_df("The Color of Magic")[0:10]
df_results

Unnamed: 0,Title,Ratings,Author
228,The Light Fantastic (Discworld Novels (Paperba...,35,Terry Pratchett
150,American Gods,27,Neil Gaiman
226,Equal Rites (Discworld Novels (Paperback)),26,Terry Pratchett
231,Pyramids (Discworld Novels (Paperback)),24,Terry Pratchett
251,Good Omens: The Nice and Accurate Prophecies o...,20,Neil Gaiman
265,84 Charing Cross Road,20,Helene Hanff
122,The Hobbit: or There and Back Again,20,J.R.R. Tolkien
214,Dreamcatcher,20,Stephen King
28,Neverwhere,19,Neil Gaiman
49,Where the Red Fern Grows,19,Wilson Rawls


In [252]:
# string, int -> list
# book title, # of results desired -> list of recomended titles 
def review_weight_recomend(book_title, number_results):
    df = make_df(book_title)
    results = []
    for index, rows in df.iterrows():
        results.append([rows[0], rows[2]])
    return results[0:number_results]
          
print(review_weight_recomend("The Color of Magic", 5))

[['The Light Fantastic (Discworld Novels (Paperback))', 'Terry Pratchett'], ['American Gods', 'Neil Gaiman'], ['Equal Rites (Discworld Novels (Paperback))', 'Terry Pratchett'], ['Pyramids (Discworld Novels (Paperback))', 'Terry Pratchett'], ['Good Omens: The Nice and Accurate Prophecies of Agnes Nutter, Witch', 'Neil Gaiman']]


In [302]:
# Prints out the recomendations in a semi-pleasing manner 
def print_book_recs(book_title, number_results):
    result_list = review_weight_recomend(book_title, number_results)
    count = 1
    end_result = []
    for result in result_list:
        str_result = (str(count) + ". " + result[0] + " - " + result[1])
        end_result.append(str_result)
        count +=1
    return print('\n'.join(end_result))
        
print_book_recs("The Color of Magic", 10)

1. The Light Fantastic (Discworld Novels (Paperback)) - Terry Pratchett
2. American Gods - Neil Gaiman
3. Equal Rites (Discworld Novels (Paperback)) - Terry Pratchett
4. Pyramids (Discworld Novels (Paperback)) - Terry Pratchett
5. Good Omens: The Nice and Accurate Prophecies of Agnes Nutter, Witch - Neil Gaiman
6. 84 Charing Cross Road - Helene Hanff
7. The Hobbit: or There and Back Again - J.R.R. Tolkien
8. Dreamcatcher - Stephen King
9. Neverwhere - Neil Gaiman
10. Where the Red Fern Grows - Wilson Rawls


## Method #2: Recs Based on Favorite Author

In [303]:
# Find all book written by author
# author name -> list of books
def find_book_by_auth(auth_name):
    id_results = graph.run("MATCH (a:Author{AName: '" + auth_name + "'})<-[r:WRITTEN_BY]-(b:BX_Book)<-[r2:LIKE] \
                            -(u:BX_User)-[r3:LIKE]->(b2:BX_Book)-[r4:WRITTEN_BY]->(a2:Author) \
                            RETURN a2.AName as Author, b2.title as Title, r3.BRating AS Rating, \
                            count(b2.ISBN) as Count order by count(b2.ISBN) desc").data()
    return id_results


# author name -> printing x recomendations
def simple_author_recs(auth_name, x):
    data = pd.DataFrame(find_book_by_auth(auth_name))
    df = pd.DataFrame(data, columns=['Author','Count', 'Rating', 'Score', 'Title'])
    df['Author'] = data["Author"]
    df["Count"] = data["Count"]
    df["Rating"] = data["Rating"]
    df["Score"] = df["Count"] * df["Rating"]
    df["Title"] = data["Title"]
    df = df.sort_values(by=["Score"], ascending=False)
    count = 1
    result_str = ""
    for i, row in df.iterrows():
        if i < x:
            result_str += (str(count) + ". " + row["Title"] + " - " + row["Author"] + '\n') 
            count +=1
    return print(result_str)

# example
simple_author_recs("John Green", 5)

1. The Mulberry Tree - Jude Deveraux
2. Slammerkin - Emma Donoghue
3. When the Wind Blows - John Saul
4. Midnight Bayou - Nora Roberts
5. Harry Potter and the Goblet of Fire (Book 4) - J. K. Rowling



## Methods 1 + 2 Driver:

In [307]:
def get_recomendation(fav, fav_type, amount=5):
    if fav_type == "author":
        return simple_author_recs(fav, amount)
    if fav_type == "book":
        return print_book_recs(fav, amount)
    else: 
        return "Please put in a valid type"

In [308]:
# example for author
get_recomendation("John Green", "author")

1. The Mulberry Tree - Jude Deveraux
2. Slammerkin - Emma Donoghue
3. When the Wind Blows - John Saul
4. Midnight Bayou - Nora Roberts
5. Harry Potter and the Goblet of Fire (Book 4) - J. K. Rowling



In [309]:
# example for book
get_recomendation("The Color of Magic", "book", 10)

1. The Light Fantastic (Discworld Novels (Paperback)) - Terry Pratchett
2. American Gods - Neil Gaiman
3. Equal Rites (Discworld Novels (Paperback)) - Terry Pratchett
4. Pyramids (Discworld Novels (Paperback)) - Terry Pratchett
5. Good Omens: The Nice and Accurate Prophecies of Agnes Nutter, Witch - Neil Gaiman
6. 84 Charing Cross Road - Helene Hanff
7. The Hobbit: or There and Back Again - J.R.R. Tolkien
8. Dreamcatcher - Stephen King
9. Neverwhere - Neil Gaiman
10. Where the Red Fern Grows - Wilson Rawls


## Method #3: Recs Based on 5 Favorite Titles - Non Matrix Factorization 