# Preprocessing of the DATABASE

In [62]:
import pandas as pd
import numpy as np
import random

- - -

## Read CSV

In [63]:
books = pd.read_csv("./Data/Books.csv")
print(len(books))

52478


- - - 

## Remove Columns

In [64]:
book = books.iloc[0]
print(book)

bookId                                       2767052-the-hunger-games
title                                                The Hunger Games
series                                            The Hunger Games #1
author                                                Suzanne Collins
rating                                                           4.33
description         WINNING MEANS FAME AND FORTUNE.LOSING MEANS CE...
language                                                      English
isbn                                                    9780439023481
genres              ['Young Adult', 'Fiction', 'Dystopia', 'Fantas...
characters          ['Katniss Everdeen', 'Peeta Mellark', 'Cato (H...
bookFormat                                                  Hardcover
edition                                                 First Edition
pages                                                             374
publisher                                            Scholastic Press
publishDate         

In [65]:
books = books.drop(columns=['bookId', 'isbn', 'edition', 'ratingsByStars', 'coverImg', 'bbeVotes', 'bbeScore', 'price', 'characters', 'publisher', 'setting', 'firstPublishDate', 'bookFormat'], axis=1)

In [66]:
books

Unnamed: 0,title,series,author,rating,description,language,genres,pages,publishDate,awards,numRatings,likedPercent
0,The Hunger Games,The Hunger Games #1,Suzanne Collins,4.33,WINNING MEANS FAME AND FORTUNE.LOSING MEANS CE...,English,"['Young Adult', 'Fiction', 'Dystopia', 'Fantas...",374,09/14/08,['Locus Award Nominee for Best Young Adult Boo...,6376780,96.0
1,Harry Potter and the Order of the Phoenix,Harry Potter #5,"J.K. Rowling, Mary GrandPré (Illustrator)",4.50,There is a door at the end of a silent corrido...,English,"['Fantasy', 'Young Adult', 'Fiction', 'Magic',...",870,09/28/04,['Bram Stoker Award for Works for Young Reader...,2507623,98.0
2,To Kill a Mockingbird,To Kill a Mockingbird,Harper Lee,4.28,The unforgettable novel of a childhood in a sl...,English,"['Classics', 'Fiction', 'Historical Fiction', ...",324,05/23/06,"['Pulitzer Prize for Fiction (1961)', 'Audie A...",4501075,95.0
3,Pride and Prejudice,,"Jane Austen, Anna Quindlen (Introduction)",4.26,Alternate cover edition of ISBN 9780679783268S...,English,"['Classics', 'Fiction', 'Romance', 'Historical...",279,10/10/00,[],2998241,94.0
4,Twilight,The Twilight Saga #1,Stephenie Meyer,3.60,About three things I was absolutely positive.\...,English,"['Young Adult', 'Fantasy', 'Romance', 'Vampire...",501,09/06/06,"['Georgia Peach Book Award (2007)', 'Buxtehude...",4964519,78.0
...,...,...,...,...,...,...,...,...,...,...,...,...
52473,Fractured,Fateful #2,Cheri Schmidt (Goodreads Author),4.00,The Fateful Trilogy continues with Fractured. ...,English,"['Vampires', 'Paranormal', 'Young Adult', 'Rom...",0,May 28th 2011,[],871,94.0
52474,Anasazi,Sense of Truth #2,Emma Michaels,4.19,"'Anasazi', sequel to 'The Thirteenth Chime' by...",English,"['Mystery', 'Young Adult']",190,August 5th 2011,[],37,95.0
52475,Marked,Soul Guardians #1,Kim Richardson (Goodreads Author),3.70,--READERS FAVORITE AWARDS WINNER 2011--Sixteen...,English,"['Fantasy', 'Young Adult', 'Paranormal', 'Ange...",280,March 18th 2011,"[""Readers' Favorite Book Award (2011)""]",6674,84.0
52476,Wayward Son,,"Tom Pollack (Goodreads Author), John Loftus (G...",3.85,A POWERFUL TREMOR UNEARTHS AN ANCIENT SECRETBu...,English,"['Fiction', 'Mystery', 'Historical Fiction', '...",507,September 1st 2011,[],238,90.0


- - - 

## Preprocess categories

In [67]:
""" We have to remove all accents and not valid strings so that the clips language works. """

from unidecode import unidecode

def clean_string(string):
    return unidecode(string)

Titles  - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

In [68]:
books['title'] = books['title'].apply(lambda x: clean_string(x))

Authors - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 

In [69]:
authors = books['author'].unique()
print(authors)

['Suzanne Collins' 'J.K. Rowling, Mary GrandPré (Illustrator)'
 'Harper Lee' ... 'Cheri Schmidt (Goodreads Author)'
 'Tom Pollack (Goodreads Author), John Loftus (Goodreads Author), Jim Alves'
 'Misty Moncur (Goodreads Author)']


In [70]:
def modify_authors(author):
    authors = author.split(',')
    author = clean_string(' '.join(authors[0].split()))
    remove_parentesis = author.find('(')
    if remove_parentesis != -1:
        author = author[:remove_parentesis]
    return author

filtered_books_df = books.copy()
filtered_books_df['author'] = filtered_books_df['author'].apply(lambda x: modify_authors(x))

In [71]:
books = filtered_books_df.copy()

Languages - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 

In [72]:
languages = books['language'].unique()
print(languages)

['English' 'French' 'German' 'Persian' 'Arabic' nan 'Spanish'
 'Multiple languages' 'Portuguese' 'Indonesian' 'Turkish' 'Polish'
 'Bulgarian' 'Tamil' 'Japanese' 'Romanian' 'Italian'
 'French, Middle (ca.1400-1600)' 'Norwegian' 'Urdu' 'Dutch' 'Finnish'
 'Marathi' 'Chinese' 'Swedish' 'Icelandic' 'Malayalam' 'Croatian'
 'Estonian' 'Greek, Modern (1453-)' 'Russian' 'Kurdish' 'Danish' 'Hindi'
 'Filipino; Pilipino' 'Serbian' 'Bengali' 'Malay' 'Catalan; Valencian'
 'Czech' 'Vietnamese' 'Armenian' 'Georgian' 'Kannada' 'Korean' 'Nepali'
 'Slovak' 'Telugu' 'Hungarian' 'English, Middle (1100-1500)' 'Azerbaijani'
 'Farsi' 'Lithuanian' 'Ukrainian' 'Bokmål, Norwegian; Norwegian Bokmål'
 'Iranian (Other)' 'Faroese' 'Basque' 'Macedonian' 'Maltese' 'Gujarati'
 'Amharic' 'Aromanian; Arumanian; Macedo-Romanian' 'Assamese'
 'Panjabi; Punjabi' 'Albanian' 'Latvian' 'Bosnian' 'Afrikaans' 'Thai'
 'Dutch, Middle (ca.1050-1350)' 'Mongolian' 'Tagalog' 'Galician' 'Aleut'
 'Slovenian' 'Undetermined' 'Greek, Ancien

In [73]:
desired_languages = ['English'] 
books = books[books['language'].isin(desired_languages)]
books = books.drop(columns=['language'])

Genres - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 

In [74]:
import ast

books['genres'] = books['genres'].apply(ast.literal_eval)

In [75]:
# Separate the "Classic" Genre as an independentn column.

classics = []
ages = []
for i, row in books.iterrows():
    genre = row['genres']
    if 'Classics' in genre:
        classics.append('classic')
    elif 'Modern Classics' in genre:
        classics.append('modern-classic')
    elif 'Children Classics' in genre:
        classics.append('children-classic')
    else:
        classics.append("no")
        
    age = []
    if 'Kids' in genre or 'Childrens' in genre:
        age.append('kids')
    if 'Teen' in genre:
        age.append('teen')
    if 'Young Adult' in genre:
        age.append('young-adult')
    if 'Adult' in genre:
        age.append('adult')
    ages.append(age)
        
books.insert(books.columns.get_loc("genres") + 1, 'classics', classics)
books.insert(books.columns.get_loc("genres") + 2, 'ages', ages)

In [76]:
def get_genres(db):
    genres = set()

    for i, row in db.iterrows():
        genre = row['genres']
        
        for j in genre:
            genres.add(j)
            
    return genres

genres = get_genres(books)
print("Genres:", len(genres), genres)

Genres: 966 {'Chick Lit', 'Hard Boiled', 'Western Historical Romance', 'Mary Shelley', 'Contemporary', 'The United States Of America', 'Cyberpunk', 'Ethiopia', 'Journal', 'Philosophy', 'M M M', 'Dragons', 'Food History', 'Georgian', 'Womens', 'Singularity', 'Spanish Literature', 'Brewing', 'German Literature', 'Fae', 'Greece', 'Womens Fiction', 'Civil War', 'Memoir', 'Love Inspired', 'Humanities', 'Travel', 'Writing', 'Computer Science', 'World War II', 'Comix', 'Amateur Sleuth', 'Star Trek Deep Space Nine', 'Irish Literature', 'Algeria', 'Space Opera', 'Art and Photography', 'Arthurian', 'History Of Medicine', 'Ornithology', 'Loveswept', 'Sustainability', 'Viking Romance', 'Oral History', 'Lds Non Fiction', 'Holland', 'Global Warming', 'International Relations', 'Romance', 'Medieval', 'Entrepreneurship', 'Adoption', 'Boarding School', 'Website Design', 'Ethnography', 'Banks', 'Personal Finance', 'Communication', 'Tragedy', 'Planetary Romance', 'Pop Culture', 'Young Adult Historical Fi

In [77]:
desired_genres = ['Romance', 'Fantasy', 'Science Fiction', 'Horror']
desired_subgenres = ['Comedy', 'Erotic Romance', 'Paranormal Romance', 'Contemporary Romance', 'Historical Romance',
                     'High Fantasy', 'Urban Fantasy', 'Sword and Sorcery', 'Epic Fantasy', 'Historical Fantasy', 'Magic',
                     'Cyberpunk', 'Hard Science Fiction', 'Post Apocalyptic', 'Space', 'Time Travel', 'Historical Fiction', 'Realistic Fiction',
                     'Paranormal', 'Demons', 'Monsters', 'Zombies', 'Vampires', 'Witchcraft']

In [78]:
genres = []
subgenres = []
for i, row in books.iterrows():
    genre = row['genres']
    row_genres = []
    row_subgenres = []
    if 'Fiction' in genre:
        for x in genre:
            if x in desired_genres:
                row_genres.append(x)
            if x in desired_subgenres:
                row_subgenres.append(x)
        
    genres.append(row_genres)
    subgenres.append(row_subgenres)
        
books['genres'] = genres
books.insert(books.columns.get_loc("genres") + 1, 'subgenres', subgenres)
books = books[books['genres'].apply(lambda x: x != [])]
books = books[books['subgenres'].apply(lambda x: x != [])]

Awards - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 

In [79]:
books['awards'] = books['awards'].apply(ast.literal_eval)

In [80]:
def update_awards(awards_list):
    return len(awards_list)

books['awards'] = books['awards'].apply(update_awards)

Series -> saga - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 

In [81]:
books['series'] = books['series'].replace(np.nan, 'No')
books.rename(columns={'series': 'saga'}, inplace=True)
books['saga'] = books['saga'].apply(lambda x: clean_string(x))

In [82]:
num_saga = []
saga = []

saga_books = books.copy()

for i, row in books.iterrows():
    name_saga = row["saga"]
    
    if name_saga == "No":
        saga.append(name_saga)
        num_saga.append("No")
        continue
    
    index = name_saga.find("#")
    new_string = name_saga[:index-1]
    saga.append(new_string)  
    
    if name_saga[index+1:len(name_saga)].isnumeric():
        num_saga.append(int(name_saga[index+1:len(name_saga)])) 
        
    else:  
        num_saga.append("remove")
        
saga_books['saga'] = saga
saga_books.insert(books.columns.get_loc("saga") + 1, "numSaga", num_saga)
saga_books = saga_books[saga_books['numSaga'].apply(lambda x: x != "remove")]

In [83]:
saga_books[saga_books['saga'] == "The Lord of the Rings"]

Unnamed: 0,title,saga,numSaga,author,rating,description,genres,subgenres,classics,ages,pages,publishDate,awards,numRatings,likedPercent
548,The Return of the King,The Lord of the Rings,3,J.R.R. Tolkien,4.53,In the third volume of The Lord of the Rings t...,[Fantasy],"[High Fantasy, Epic Fantasy, Magic]",classic,[],385,07/12/74,0,678718,98.0
608,The Two Towers,The Lord of the Rings,2,J.R.R. Tolkien,4.44,The Fellowship was scattered. Some were bracin...,[Fantasy],"[High Fantasy, Epic Fantasy, Magic]",classic,[],322,09/05/03,0,719647,97.0


In [84]:
books = saga_books.copy()

Publish Date - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 

In [85]:
books['publishDate'] = books['publishDate'].replace(np.nan, "00/00/00")

In [86]:
from dateutil import parser

def extract_year(date_str):
    try:
        date_obj = parser.parse(date_str)
        return date_obj.year
    except ValueError:
        return None

books['publishDate'] = books['publishDate'].apply(extract_year)

Result - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 

In [87]:
books = books.dropna()
books.isna().sum()

title           0
saga            0
numSaga         0
author          0
rating          0
description     0
genres          0
subgenres       0
classics        0
ages            0
pages           0
publishDate     0
awards          0
numRatings      0
likedPercent    0
dtype: int64

In [88]:
books

Unnamed: 0,title,saga,numSaga,author,rating,description,genres,subgenres,classics,ages,pages,publishDate,awards,numRatings,likedPercent
0,The Hunger Games,The Hunger Games,1,Suzanne Collins,4.33,WINNING MEANS FAME AND FORTUNE.LOSING MEANS CE...,"[Fantasy, Science Fiction, Romance]",[Post Apocalyptic],no,"[teen, young-adult]",374,2008.0,41,6376780,96.0
1,Harry Potter and the Order of the Phoenix,Harry Potter,5,J.K. Rowling,4.50,There is a door at the end of a silent corrido...,[Fantasy],[Magic],classic,"[kids, young-adult]",870,2004.0,9,2507623,98.0
3,Pride and Prejudice,No,No,Jane Austen,4.26,Alternate cover edition of ISBN 9780679783268S...,[Romance],"[Historical Fiction, Historical Romance]",classic,[adult],279,2000.0,0,2998241,94.0
4,Twilight,The Twilight Saga,1,Stephenie Meyer,3.60,About three things I was absolutely positive.\...,"[Fantasy, Romance]","[Vampires, Paranormal, Paranormal Romance, Urb...",no,"[teen, young-adult]",501,2006.0,25,4964519,78.0
9,Gone with the Wind,No,No,Margaret Mitchell,4.30,"Scarlett O'Hara, the beautiful, spoiled daught...",[Romance],"[Historical Fiction, Historical Romance]",classic,[],1037,1999.0,2,1074620,94.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
52461,Sweet Possession,Sweet Addiction,2,J. Daniels,4.33,There is an alternate cover edition for this A...,[Romance],[Contemporary Romance],no,[adult],248,2014.0,0,10663,97.0
52466,Theodosia and the Last Pharaoh,Theodosia Throckmorton,4,R.L. LaFevers,4.26,"In this fourth book in the series, Theodosia s...",[Fantasy],[Historical Fiction],no,"[kids, young-adult]",400,2011.0,0,1958,97.0
52471,Elemental,Soul Guardians,2,Kim Richardson,4.07,When seventeen-year-old Kara Nightingale is su...,"[Fantasy, Romance, Science Fiction]","[Paranormal, Demons, Paranormal Romance]",no,[young-adult],151,2011.0,0,1947,94.0
52475,Marked,Soul Guardians,1,Kim Richardson,3.70,--READERS FAVORITE AWARDS WINNER 2011--Sixteen...,"[Fantasy, Romance]","[Paranormal, Demons, Paranormal Romance, Urban...",no,[young-adult],280,2011.0,1,6674,84.0


- - -

## Generate Data

In [89]:
"""
(defclass Book
    (multislot genres)
    (slot author)
    (slot orientated-age)
    (slot period)
    (slot film)
    (slot narrator)
    (slot saga)
    (slot n-volumes)      ESTA NO SE
    (slot saga-finished)  ESTA TAMPOCO
    (slot book-length)
    (slot punctuation)
    (slot classic)
    (slot antiquity-of-book)
    (slot recently-published)
    (slot bestseller)
    (slot comedy)
    (slot drama)
    (slot happiness)
    (slot predictable)
    (slot violence)
    (slot conventional)
)
"""

'\n(defclass Book\n    (multislot genres)\n    (slot author)\n    (slot orientated-age)\n    (slot period)\n    (slot film)\n    (slot narrator)\n    (slot saga)\n    (slot n-volumes)      ESTA NO SE\n    (slot saga-finished)  ESTA TAMPOCO\n    (slot book-length)\n    (slot punctuation)\n    (slot classic)\n    (slot antiquity-of-book)\n    (slot recently-published)\n    (slot bestseller)\n    (slot comedy)\n    (slot drama)\n    (slot happiness)\n    (slot predictable)\n    (slot violence)\n    (slot conventional)\n)\n'

In [90]:
saga_finished = []
period = []
film = []
narrator = []
hapiness = []
drama = []
comedy = []
predictable = []
violence = []
conventional = [] 

random.seed(42)

sagas_done = {}

for i, row in books.iterrows():
    saga = row['saga']
    if saga == "No":
        saga_finished.append("No")
    else:
        if saga not in sagas_done:
            choice = random.choice(['Yes', 'No'])
            saga_finished.append(choice)
            sagas_done[saga] = choice
        else:
            saga_finished.append(sagas_done[saga])
    period.append(random.choice(['past', 'present', 'future']))
    film.append(random.choice(['Yes', 'No']))
    narrator.append(random.choices(['first-person', 'second-person', 'third-person', 'omniscent'], weights=[0.25, 0.05, 0.5, 0.2], k=1)[0])
    hapiness.append(random.randint(0, 10))
    drama.append(random.randint(0, 10))
    comedy.append(random.randint(0, 10))
    predictable.append(random.randint(0, 10))
    violence.append(random.randint(0, 10))
    conventional.append(random.randint(0, 10))


new_books = books.copy()
new_books.insert(new_books.columns.get_loc("numSaga") + 1, "sagaFinished", saga_finished)
new_books['period'] = period
new_books['film'] = film
new_books['narrator'] = narrator
new_books['hapiness'] = hapiness
new_books['drama'] = drama
new_books['comedy'] = comedy
new_books['predictable'] = predictable
new_books['violence'] = violence
new_books['conventional'] = conventional

In [91]:
new_books

Unnamed: 0,title,saga,numSaga,sagaFinished,author,rating,description,genres,subgenres,classics,...,likedPercent,period,film,narrator,hapiness,drama,comedy,predictable,violence,conventional
0,The Hunger Games,The Hunger Games,1,Yes,Suzanne Collins,4.33,WINNING MEANS FAME AND FORTUNE.LOSING MEANS CE...,"[Fantasy, Science Fiction, Romance]",[Post Apocalyptic],no,...,96.0,past,No,first-person,2,1,10,8,1,9
1,Harry Potter and the Order of the Phoenix,Harry Potter,5,No,J.K. Rowling,4.50,There is a door at the end of a silent corrido...,[Fantasy],[Magic],classic,...,98.0,past,Yes,first-person,3,8,9,0,8,3
3,Pride and Prejudice,No,No,No,Jane Austen,4.26,Alternate cover edition of ISBN 9780679783268S...,[Romance],"[Historical Fiction, Historical Romance]",classic,...,94.0,future,No,first-person,9,4,0,2,6,5
4,Twilight,The Twilight Saga,1,No,Stephenie Meyer,3.60,About three things I was absolutely positive.\...,"[Fantasy, Romance]","[Vampires, Paranormal, Paranormal Romance, Urb...",no,...,78.0,past,Yes,omniscent,5,1,1,6,1,5
9,Gone with the Wind,No,No,No,Margaret Mitchell,4.30,"Scarlett O'Hara, the beautiful, spoiled daught...",[Romance],"[Historical Fiction, Historical Romance]",classic,...,94.0,present,No,omniscent,7,8,1,6,1,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
52461,Sweet Possession,Sweet Addiction,2,Yes,J. Daniels,4.33,There is an alternate cover edition for this A...,[Romance],[Contemporary Romance],no,...,97.0,future,Yes,third-person,0,9,8,7,7,6
52466,Theodosia and the Last Pharaoh,Theodosia Throckmorton,4,No,R.L. LaFevers,4.26,"In this fourth book in the series, Theodosia s...",[Fantasy],[Historical Fiction],no,...,97.0,present,No,third-person,7,1,9,4,7,1
52471,Elemental,Soul Guardians,2,Yes,Kim Richardson,4.07,When seventeen-year-old Kara Nightingale is su...,"[Fantasy, Romance, Science Fiction]","[Paranormal, Demons, Paranormal Romance]",no,...,94.0,present,Yes,third-person,3,7,0,0,9,1
52475,Marked,Soul Guardians,1,Yes,Kim Richardson,3.70,--READERS FAVORITE AWARDS WINNER 2011--Sixteen...,"[Fantasy, Romance]","[Paranormal, Demons, Paranormal Romance, Urban...",no,...,84.0,past,Yes,third-person,5,3,7,7,5,4


- - -

## Save results

In [92]:
books.to_csv('./Data/Books_preprocessed.csv', index=False)
new_books.to_csv('./Data/Books_preprocessed_generated_data.csv', index=False)

- - -

# Create Authors Data Base

In [93]:
import statistics

authors = []
books_authors = []
genres_authors = []
subgenres_authors = []
classic_authors = []
narrator_authors = []

for i, row in new_books.iterrows():    
    author = row["author"]
    
    if author in authors:
        continue
    
    authors.append(author)
    
    books_author = []
    genres_author = []
    subgenres_author = []
    classic_author = []
    narrator_author = []
    
    for j, row2 in new_books[new_books['author'] == author].iterrows():
        
        books_author.append(row2['title'])
        
        for i in row2['genres']:
            if i not in genres_author:
                genres_author.append(i)
        
        for i in row2['subgenres']:
            if i not in subgenres_author:
                subgenres_author.append(i)
        
        classic_author.append(row2['classics'])
        
        narrator_author.append(row2['narrator'])
    
    books_authors.append(books_author)
    genres_authors.append(genres_author)
    subgenres_authors.append(subgenres_author)
    classic_authors.append(statistics.mode(classic_author))
    narrator_authors.append(statistics.mode(narrator_author))

In [94]:
data = {
    'author': authors,
    'books': books_authors,
    'genres': genres_authors,
    'subgenres': subgenres_authors,
    'classic': classic_authors,
    'narrator': narrator_authors
}

authors_db = pd.DataFrame(data)
authors_db

Unnamed: 0,author,books,genres,subgenres,classic,narrator
0,Suzanne Collins,"[The Hunger Games, Catching Fire, Mockingjay, ...","[Fantasy, Science Fiction, Romance]",[Post Apocalyptic],no,first-person
1,J.K. Rowling,"[Harry Potter and the Order of the Phoenix, Ha...",[Fantasy],"[Magic, Historical Fiction]",classic,omniscent
2,Jane Austen,"[Pride and Prejudice, The Complete Novels, Man...",[Romance],"[Historical Fiction, Historical Romance]",classic,third-person
3,Stephenie Meyer,"[Twilight, Midnight Sun [2008 Draft], The Host...","[Fantasy, Romance, Science Fiction]","[Vampires, Paranormal, Paranormal Romance, Urb...",no,third-person
4,Margaret Mitchell,[Gone with the Wind],[Romance],"[Historical Fiction, Historical Romance]",classic,omniscent
...,...,...,...,...,...,...
4858,Paul Melko,"[The Walls of the Universe, Broken Universe]","[Science Fiction, Fantasy]",[Time Travel],no,omniscent
4859,Gideon Defoe,[The Pirates! In an Adventure with Scientists],[Fantasy],"[Historical Fiction, Comedy]",no,omniscent
4860,Brian Lynch,"[Angel: After the Fall, Volume 1]","[Fantasy, Horror]","[Vampires, Urban Fantasy, Paranormal]",no,third-person
4861,Kim Richardson,"[Elemental, Marked]","[Fantasy, Romance, Science Fiction]","[Paranormal, Demons, Paranormal Romance, Urban...",no,third-person


In [95]:
authors_db.to_csv('./Data/Authors.csv', index=False)

- - -

# Create sagas and genres database

In [96]:
sagas = []
book_names = []
number_books = []
saga_finished = []
for i, row in new_books.iterrows():
    saga = row['saga']
    if saga in sagas or saga == "No": continue
    
    names = []
    for j, row2 in new_books[new_books['saga'] == saga].iterrows():
        names.append(row2['title'])
    number = len(names)    
    finished = row['sagaFinished']

    sagas.append(saga)
    book_names.append(names)
    number_books.append(number)
    saga_finished.append(finished)

In [97]:
data = {
    'saga': sagas,
    'books': book_names,
    'number': number_books,
    'finished': saga_finished
}

sagas_df = pd.DataFrame(data)

In [98]:
sagas_df

Unnamed: 0,saga,books,number,finished
0,The Hunger Games,"[The Hunger Games, Catching Fire, Mockingjay]",3,Yes
1,Harry Potter,"[Harry Potter and the Order of the Phoenix, Ha...",8,No
2,The Twilight Saga,"[Twilight, Eclipse, Breaking Dawn, New Moon, M...",5,No
3,The Hitchhiker's Guide to the Galaxy,"[The Hitchhiker's Guide to the Galaxy, The Res...",5,Yes
4,Divergent,"[Divergent, Insurgent, Allegiant]",3,Yes
...,...,...,...,...
4283,The Pirates!,[The Pirates! In an Adventure with Scientists],1,Yes
4284,"Nathaniel Fludd, Beastologist",[The Basilisk's Lair],1,Yes
4285,Angel: After the Fall,"[Angel: After the Fall, Volume 1]",1,No
4286,Soul Guardians,"[Elemental, Marked]",2,Yes


In [99]:
sagas_df.to_csv("./Data/Sagas.csv", index=False)