In [None]:
import requests
from bs4 import BeautifulSoup

In [None]:
import re
date_pattern = '[JFMASOND].*?\s.*?\d{4}'
publisher_pattern = '(?<=by ).*'
suffix_pattern = '(?<=\d)[stndrh]+'

In [None]:
def get_response(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response
    except requests.exceptions.HTTPError as errh:
        print('HTTP Error: ', errh)
    except requests.exceptions.ConnectionError as errc:
        print('Connection Error: ', errc)
    except requests.exceptions.Timeout as errt:
        print('Timed out: ', errt)
    except requests.exceptions.RequestException as err:
        print('Some other error: ', err)    

In [None]:
def get_links():
    url = 'https://www.goodreads.com/genres/most_read/fantasy'

    response = get_response(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    links = soup.select('div.coverWrapper > a')

    list_links = []
    base_url = 'https://goodreads.com'
    for link in links:
        list_links.append(base_url + link['href'])
    return list_links

In [None]:
list_links = get_links()
list_links[:10]

In [None]:
#lists
list_titles = []
list_authors = []
list_stars = []
list_ratings_count = []
list_pages = []
list_publish_date =[]
list_publishers = []
list_languages = []

In [None]:
def get_soup(url):
    response = get_response(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    html_tag = soup.find('html', {'lang': 'en'})
    if html_tag is not None:
        return get_soup(url)
    else:
        return soup

In [None]:
def get_info(soup):
    object_title = soup.find('h1')
    if object_title is not None:
        title = object_title.text.strip()
    else:
        title = 'NA'
    object_author = soup.find('span', {'itemprop': 'name'})
    if object_title is not None:
        author = object_author.text
    else:
        author = 'NA'
    object_stars = soup.find('span', {'itemprop': 'ratingValue'})
    if object_stars is not None:
        stars = object_stars.text.strip()
    else:
        stars = '1.00'
    object_ratings = soup.find('meta', {'itemprop': 'ratingCount'})
    if object_ratings is not None:
        ratings = object_ratings['content']
    else:
        ratings = 'NA'
    object_pages = soup.find('meta', {'property': 'books:page_count'})
    if object_pages is not None:
        pages = object_pages['content']
    else:
        pages = 'NA'
    object_publish_info = soup.select_one('div.row + div.row')
    if object_publish_info is not None:
        publish_info = object_publish_info.text.strip()
        date = re.findall(date_pattern, publish_info)
        if len(date) == 0:
            date.append('NA')
        publisher = re.findall(publisher_pattern, publish_info)
        if len(publisher) == 0:
            publisher.append('NA')
    else:
        date = 'NA'
        publisher = 'NA'
    object_language = soup.find('div', {'class': 'infoBoxRowItem', 'itemprop': 'inLanguage'})
    if object_language is not None:
        language = object_language.text
    else:
        language = 'English'
    
    list_titles.append(title)
    list_authors.append(author)
    list_stars.append(stars)
    list_ratings_count.append(ratings)
    list_pages.append(pages)
    list_publish_date.append(date[0])
    list_publishers.append(publisher[0])
    list_languages.append(language)
    

In [None]:
from IPython.display import clear_output

def main_func():
    i = 1
    for url_link in list_links:
        soup = get_soup(url_link)
        get_info(soup)
        clear_output(wait=True)
        print(f'completed... {i}/100', flush= True)
        i += 1

In [None]:
main_func()

In [None]:
len(list_titles)

In [None]:
#fixes dates' format
for i, date in enumerate(list_publish_date):
    suffix = re.findall(suffix_pattern, date)
    if len(suffix) != 0:
        list_publish_date[i] = date.replace(suffix[0], ',')

In [None]:
#comparator function returns list that contains ratio of rating count to stars
def comparator_function(list_ratings_count, list_stars):
    comparator = []
    for i, k in zip(list_ratings_count, list_stars):
        comparator.append(float(k)/float(i))
    return comparator
    
comparator = comparator_function(list_stars, list_ratings_count)

In [None]:
main_dict ={
    'Title'         : list_titles,
    'Author'        : list_authors,
    'Stars'         : list_stars,
    'Ratings Count' : list_ratings_count,
    'Pages'         : list_pages,
    'Publish Date'  : list_publish_date,
    'Publisher'     : list_publishers,
    'Language'      : list_languages,
    'comparator'    : comparator
}

In [None]:
import pandas as pd
books_df = pd.DataFrame(main_dict)
books_df = books_df.sort_values(by='comparator', ascending=False, kind='mergesort', ignore_index=True)
books_df = books_df.drop('comparator', axis= 1)
books_df

In [None]:
number_list = [x for x in range(1, 101)]
books_df['S No'] = number_list
books_df = books_df.set_index('S No')

In [None]:
books_df.to_csv('list-of-fantasy-fiction-books.csv')