In [1]:
import networkx as nx
import matplotlib.pyplot as plt
import pickle
import pandas as pd
import random
from networkx.algorithms import community
import numpy as np

In [2]:
# Load the dataset
dataset_path = "Processed_BookData.csv"
df = pd.read_csv(dataset_path)

# sample for testing
random.seed(74)  # for reproducibility
selected_rows = random.sample(range(len(df)), int(0.01 * len(df)))
df_sample = df.iloc[selected_rows]

In [8]:
len(set(df_sample['book_authors'].values))

346

In [12]:
# Load the communities
communities_set_path = "Communities_1%_BookData.csv"
communities_df = pd.read_csv(communities_set_path)

authors = set(communities_df['Author1'].values).union(set(communities_df['Author2'].values))
len(authors)

346

In [22]:
count=0
for book in df_sample['book_title'].values:
    print(count,end=".\t")
    print(book)
    count += 1

0.	Burlian
1.	Third Grave Dead Ahead
2.	The Sisters Who Would Be Queen: Mary, Katherine, and Lady Jane Grey: A Tudor Tragedy
3.	Unemployable!
4.	Glimpses of Unfamiliar Japan
5.	The Contest
6.	Pauliska, ou la perversité moderne
7.	The Study Series Bundle
8.	Love, Dishonor, Marry, Die, Cherish, Perish
9.	The Patchwork House
10.	Sunshine
11.	The Future Eaters: An Ecological History of the Australasian Lands and People
12.	Avoiding Commitment
13.	Succubus Blues
14.	Guy Noir and the Straight Skinny
15.	Rhapsodic
16.	Daughter of the Earth and Sky
17.	The Grey King
18.	The Complete Photo Guide to Hand Lettering and Calligraphy: The Essential Reference for Novice and Expert Letterers and Calligraphers
19.	Water Walker
20.	Until Jax
21.	Bleeding Violet
22.	Fame, Glory, and Other Things on My To Do List
23.	Great Mysteries of the Past: Experts Unravel Fact and Fallacy Behind the Headlines of History
24.	Taken
25.	The Blackhouse
26.	Acceptance
27.	The Housewife Assassin's Handbook
28.	The Death o

In [125]:
def get_n_authors (author,n):
    other_authors = {}
    results1 = communities_df.loc[communities_df['Author1']==author].values
    
    if len(results1)!=0:
        for res in results1:
            other_authors[res[1]] = res[2]
    
    results2 = communities_df.loc[communities_df['Author2']==author].values
    if len(results2)!=0:
        for res in results2:
            if other_authors.get(res[0]) is None:
                other_authors[res[0]] = res[2]
            
    sorted_authors = sorted(other_authors.items(), key = lambda x: x[1], reverse=True)
    
    top_n_authors = dict(sorted_authors[:n])
    
    return top_n_authors.keys()

In [178]:
recs = get_n_authors('Henrietta Clarke',50)

# print(recs)
for rec in recs:
    print(rec)

Jackie Williams
Robin Reardon
T.J. Klune
Cardeno C.
Mel Bossa
Kari Gregg
Olivia Cunning
Courtney Summers
Jacqueline Green
Cecily von Ziegesar
Hannah Richell
Michelle Falkoff
Kristen Ashley
Judith McNaught
Aurora Rose Reynolds
Jay McLean
Olivia Evans
Lora Leigh
Lorelei James
Nicola Yoon
Jill Shalvis
Samantha Young
Billie Letts
thefreakoffreaks
Blue Jeans
Cindy Gerard
Janette Rallison
Elizabeth Finn
Non Pratt
Erin L. Schneider
K.A. Linde
Sonya Sones
Sophie Kinsella
Kelly Oram
Adam Pelzman
Amanda Brown
Lisa Gillis
Maureen Johnson
Robert Bryndza
Rainbow Rowell
A. Drew
Gloria Cook
Laurelin Paige
Jillian Dodd
Jill Stengl
Ally Carter
Fuyumi Soryo
Lynn Kurland
Lisa Kleypas
Ray Anselmo


In [172]:
def get_m_books(authors,m):
    filtered_df = df_sample[df_sample['book_authors'].isin(authors)]
    books = {}
    for _, row in filtered_df.iterrows():
        books[row['book_title']] = {"author": row['book_authors'], "rating": row['book_rating'] * row['book_rating_count']}
    # print(books.items())   
    sorted_books = sorted(books.items(), key = lambda x:x[1]['rating'], reverse=True)
    results = {}
    # print(dict(sorted_books).keys())
    for book in dict(sorted_books).keys():
        # print(dict(sorted_books)[book])
        results[book] = dict(sorted_books)[book]['author']
    # print(results.items())
    if (len(books.keys()) <= m):
        return results.items()
    
    else:
        top_m_books = list(results.items())[:m]
        return top_m_books

In [177]:
get_m_books(recs,30)

[('Fangirl', 'Rainbow Rowell'),
 ('Where the Heart Is', 'Billie Letts'),
 ('Carry On', 'Rainbow Rowell'),
 ('The Sun Is Also a Star', 'Nicola Yoon'),
 ('13 Little Blue Envelopes', 'Maureen Johnson'),
 ("Don't Judge a Girl by Her Cover", 'Ally Carter'),
 ('Finding Audrey', 'Sophie Kinsella'),
 ('Secrets of a Summer Night', 'Lisa Kleypas'),
 ('Lady Luck', 'Kristen Ashley'),
 ('Avoiding Commitment', 'K.A. Linde'),
 ('Hero', 'Samantha Young'),
 ('Hot Ticket', 'Olivia Cunning'),
 ('Double Time', 'Olivia Cunning'),
 ('Perfect', 'Judith McNaught'),
 ('The Fixed Trilogy (Fixed, #1-3)', 'Laurelin Paige'),
 ('More Than Her', 'Jay McLean'),
 ('Mars, Volume 01', 'Fuyumi Soryo'),
 ('Until Jax', 'Aurora Rose Reynolds'),
 ("What My Mother Doesn't Know", 'Sonya Sones'),
 ('Some Girls Are', 'Courtney Summers'),
 ('Legally Blonde', 'Amanda Brown'),
 ('V is for Virgin', 'Kelly Oram'),
 ('Slow Heat', 'Jill Shalvis'),
 ('Playlist for the Dead', 'Michelle Falkoff'),
 ('Renegade', 'Lora Leigh'),
 ('Bound', '

User can choose and enter a book title (from the above list) and they will be shown the details of that book and recommendations of similar books

1. Enter the book title
2. Enter the number of authors to fetch from the community => n
3. Enter the number of book recommendations to get => m

**input box comes at the top of the screen**

In [25]:
book_title = input("Enter title of the book to search: ")
book_title

'Taken'

In [180]:
n = int(input("Enter the number of authors to fetch from the community: "))
n

50

In [181]:
m = int(input("Enter the numebr of book recommendations to get: "))
m

25

In [186]:
search = df_sample[df_sample['book_title']==book_title]
if len(search.values) != 0 :
    author = search.values[0][0]
    num_pages = search.values[0][1]
    book_rating = search.values[0][2]
    book_rating_count = search.values[0][3]
    genres = search.values[0][6].split(",")
    # print(search)
    print()
    print("Book Title:", book_title)
    print("Author:", author)
    print("Number of Pages:", num_pages)
    print("Rating:", book_rating)
    print("Number of Ratings:", book_rating_count)
    print("Genres:",genres)
    print()
    print("\n----- Other Books you may like ------\n")
    recs = get_m_books(get_n_authors(author,n),m)
    count=0
    for rec in recs:
        count+=1
        print(count,end=". ")
        print(rec[0],"by",rec[1])
    print()
else:
    print("----- Sorry Book not found -----")
    
    


Book Title: Taken
Author: Erin Bowman
Number of Pages: 360
Rating: 3.76
Number of Ratings: 11122
Genres: ['Dystopia', 'Fantasy', 'Science Fiction', 'Young Adult']


----- Other Books you may like ------

1. City of Bones by Cassandra Clare
2. The Selection by Kiera Cass
3. Dead Until Dark by Charlaine Harris
4. Red Queen by Victoria Aveyard
5. A Monster Calls by Patrick Ness
6. Un monstruo viene a verme by Patrick Ness
7. The Fiery Cross by Diana Gabaldon
8. The Unbecoming of Mara Dyer by Michelle Hodkin
9. Glass Sword by Victoria Aveyard
10. The Queen of the Tearling by Erika Johansen
11. The Book of Three by Lloyd Alexander
12. The Grey King by Susan Cooper
13. Acceptance by Jeff VanderMeer
14. Blestemul Vraciului by Joseph Delaney
15. Unraveling by Elizabeth Norris
16. The Goblin Emperor by Katherine Addison
17. The Singing by Alison Croggon
18. Before I Wake by Rachel Vincent
19. The October Country by Ray Bradbury
20. Homecoming by Kass Morgan
21. Sookie Stackhouse 8 Volume Set b

In [187]:
def searchBook():
    book_title = input("Enter title of the book to search: ")
    n = int(input("Enter the number of authors to fetch from the community: "))
    m = int(input("Enter the numebr of book recommendations to get: "))
    search = df_sample[df_sample['book_title']==book_title]
    if len(search.values) != 0 :
        author = search.values[0][0]
        num_pages = search.values[0][1]
        book_rating = search.values[0][2]
        book_rating_count = search.values[0][3]
        genres = search.values[0][6].split(",")
        # print(search)
        print()
        print("Book Title:", book_title)
        print("Author:", author)
        print("Number of Pages:", num_pages)
        print("Rating:", book_rating)
        print("Number of Ratings:", book_rating_count)
        print("Genres:",genres)
        print()
        print("\n----- Other Books you may like ------\n")
        recs = get_m_books(get_n_authors(author,n),m)
        count=0
        for rec in recs:
            count+=1
            print(count,end=". ")
            print(rec[0],"by",rec[1])
        print()
    else:
        print("----- Sorry Book not found -----")

In [188]:
searchBook()


Book Title: 13 Little Blue Envelopes
Author: Maureen Johnson
Number of Pages: 322
Rating: 3.64
Number of Ratings: 73983
Genres: ['Contemporary', 'Fiction', 'Romance', 'Young Adult']


----- Other Books you may like ------

1. Fangirl by Rainbow Rowell
2. Where the Heart Is by Billie Letts
3. Carry On by Rainbow Rowell
4. The Sun Is Also a Star by Nicola Yoon
5. Don't Judge a Girl by Her Cover by Ally Carter
6. Finding Audrey by Sophie Kinsella
7. Secrets of a Summer Night by Lisa Kleypas
8. Lady Luck by Kristen Ashley
9. If There Be Thorns by V.C. Andrews
10. How I Live Now by Meg Rosoff
11. Avoiding Commitment by K.A. Linde
12. Hero by Samantha Young
13. Hot Ticket by Olivia Cunning
14. Double Time by Olivia Cunning
15. Perfect by Judith McNaught
16. The Fixed Trilogy (Fixed, #1-3) by Laurelin Paige
17. More Than Her by Jay McLean
18. Mars, Volume 01 by Fuyumi Soryo
19. Until Jax by Aurora Rose Reynolds
20. What My Mother Doesn't Know by Sonya Sones

