## Load CSV and Libraries

In [None]:
# Imports
import pandas as pd
from datetime import datetime
import json

In [None]:
# get the dataset to clean
df = pd.read_csv("/content/top100_goodreads.csv")
df.head()

Unnamed: 0,Goodreads_ID,Index,ISBN,Title,Author,Imprint,Publisher Group,Volume,Value,RRP,ASP,Binding,Publ Date,Product Class,book series,num pages,genres,shelves,num goodreads ratings,average goodreads ratings,goodreads rating distribution,num goodreads reviews
0,968.The_Da_Vinci_Code,1,9780552149518,"Da Vinci Code,The","Brown, Dan",Corgi Books,Transworld Grp,4522025,"£22,857,837.53",£7.99,£5.05,Paperback,1 Mar 2004,"F2.1 Crime, Thriller & Adventure",Robert Langdon #2,489.0,"{'genres': ['Fiction', 'Mystery', 'Thriller', ...","{'to-read': 416339, 'currently-reading': 19278...",2121534,3.88,"{'5 Stars': 731457, '4 Stars': 729405, '3 Star...",50199
1,72193.Harry_Potter_and_the_Philosopher_s_Stone,3,9780747532743,Harry Potter and the Philosopher's Stone,"Rowling, J. K.",Bloomsbury Publishing PLC,Bloomsbury Grp,3844316,"£19,853,187.43",£6.99,£5.22,Paperback,26 Jun 1997,Y2.1 Children's Fiction,Harry Potter #1,223.0,"{'genres': ['Fantasy', 'Fiction', 'Young Adult...","{'to-read': 1266232, 'currently-reading': 1870...",8334489,4.48,"{'5 Stars': 5420633, '4 Stars': 1906445, '3 St...",131608
2,15881.Harry_Potter_and_the_Chamber_of_Secrets,7,9780747538486,Harry Potter and the Chamber of Secrets,"Rowling, J. K.",Bloomsbury Publishing PLC,Bloomsbury Grp,3184492,"£16,224,021.98",£6.99,£5.07,Paperback,1 Apr 1999,Y2.1 Children's Fiction,Harry Potter #2,341.0,"{'genres': ['Fantasy', 'Fiction', 'Young Adult...","{'fantasy': 57513, 'favorites': 35022, 'fictio...",3214825,4.43,"{'5 Stars': 1906462, '4 Stars': 878809, '3 Sta...",63057
3,960.Angels_Demons,6,9780552150736,Angels and Demons,"Brown, Dan",Corgi Books,Transworld Grp,3096850,"£15,537,324.84",£7.99,£5.05,Paperback,1 Jul 2003,"F2.1 Crime, Thriller & Adventure",Robert Langdon #1,736.0,"{'genres': ['Fiction', 'Mystery', 'Thriller', ...","{'to-read': 656905, 'currently-reading': 16603...",2921948,3.92,"{'5 Stars': 1044923, '4 Stars': 989925, '3 Sta...",31070
4,2.Harry_Potter_and_the_Order_of_the_Phoenix,2,9780747551003,Harry Potter and the Order of the Phoenix,"Rowling, J. K.",Bloomsbury Publishing PLC,Bloomsbury Grp,3043226,"£33,925,431.19",£16.99,£11.15,Hardback,21 Jun 2003,Y2.1 Children's Fiction,Harry Potter #5,870.0,"{'genres': ['Fantasy', 'Young Adult', 'Fiction...","{'to-read': 400226, 'currently-reading': 62033...",2882627,4.5,"{'5 Stars': 1842775, '4 Stars': 726288, '3 Sta...",50446


## Issue 1: Convert Pounds to American Dollars

In [None]:
# convert pounds to american dollars
def pounds_to_dollars(amt):
  """
  Converts string '£xxx,xxx' to '$xxx,xxx' 
  counterpart including the 2022 numerical 
  conversion from pounds to dollars

  Input:
    str amt: string in £ format
  Output:
    str amt: in $ format
  """
  # remove extranious characters
  amt = amt.replace("£", "")
  amt = amt.replace(",", "")
  amt = amt.strip()

  # numerical conversion £1:$1.34
  amt = float(amt) * 1.34

  # convert to dollar string format
  amt = "${:,.2f}".format(amt)

  return amt

In [None]:
# Example
pounds_to_dollars("£3,722,312.19")

'$4,987,898.33'

In [None]:
# Apply to the columns to convert from pounds to dollars
df['Value'] = df['Value'].apply(pounds_to_dollars)
df['RRP'] = df['RRP'].apply(pounds_to_dollars)
df['ASP'] = df['ASP'].apply(pounds_to_dollars)

In [None]:
df.head()

Unnamed: 0,Goodreads_ID,Index,ISBN,Title,Author,Imprint,Publisher Group,Volume,Value,RRP,ASP,Binding,Publ Date,Product Class,book series,num pages,genres,shelves,num goodreads ratings,average goodreads ratings,goodreads rating distribution,num goodreads reviews
0,968.The_Da_Vinci_Code,1,9780552149518,"Da Vinci Code,The","Brown, Dan",Corgi Books,Transworld Grp,4522025,"$30,629,502.29",$10.71,$6.77,Paperback,1 Mar 2004,"F2.1 Crime, Thriller & Adventure",Robert Langdon #2,489.0,"{'genres': ['Fiction', 'Mystery', 'Thriller', ...","{'to-read': 416339, 'currently-reading': 19278...",2121534,3.88,"{'5 Stars': 731457, '4 Stars': 729405, '3 Star...",50199
1,72193.Harry_Potter_and_the_Philosopher_s_Stone,3,9780747532743,Harry Potter and the Philosopher's Stone,"Rowling, J. K.",Bloomsbury Publishing PLC,Bloomsbury Grp,3844316,"$26,603,271.16",$9.37,$6.99,Paperback,26 Jun 1997,Y2.1 Children's Fiction,Harry Potter #1,223.0,"{'genres': ['Fantasy', 'Fiction', 'Young Adult...","{'to-read': 1266232, 'currently-reading': 1870...",8334489,4.48,"{'5 Stars': 5420633, '4 Stars': 1906445, '3 St...",131608
2,15881.Harry_Potter_and_the_Chamber_of_Secrets,7,9780747538486,Harry Potter and the Chamber of Secrets,"Rowling, J. K.",Bloomsbury Publishing PLC,Bloomsbury Grp,3184492,"$21,740,189.45",$9.37,$6.79,Paperback,1 Apr 1999,Y2.1 Children's Fiction,Harry Potter #2,341.0,"{'genres': ['Fantasy', 'Fiction', 'Young Adult...","{'fantasy': 57513, 'favorites': 35022, 'fictio...",3214825,4.43,"{'5 Stars': 1906462, '4 Stars': 878809, '3 Sta...",63057
3,960.Angels_Demons,6,9780552150736,Angels and Demons,"Brown, Dan",Corgi Books,Transworld Grp,3096850,"$20,820,015.29",$10.71,$6.77,Paperback,1 Jul 2003,"F2.1 Crime, Thriller & Adventure",Robert Langdon #1,736.0,"{'genres': ['Fiction', 'Mystery', 'Thriller', ...","{'to-read': 656905, 'currently-reading': 16603...",2921948,3.92,"{'5 Stars': 1044923, '4 Stars': 989925, '3 Sta...",31070
4,2.Harry_Potter_and_the_Order_of_the_Phoenix,2,9780747551003,Harry Potter and the Order of the Phoenix,"Rowling, J. K.",Bloomsbury Publishing PLC,Bloomsbury Grp,3043226,"$45,460,077.79",$22.77,$14.94,Hardback,21 Jun 2003,Y2.1 Children's Fiction,Harry Potter #5,870.0,"{'genres': ['Fantasy', 'Young Adult', 'Fiction...","{'to-read': 400226, 'currently-reading': 62033...",2882627,4.5,"{'5 Stars': 1842775, '4 Stars': 726288, '3 Sta...",50446


## Issue 2: Remove Unnecessary Columns & Set Index to Goodreads ID

In [None]:
df = df.drop("Index", axis=1)

In [None]:
df = df.set_index("Goodreads_ID")

## Issue 3: Make Dates into the DD/MM/YYYY format

In [None]:
def change_date(date_str):
  """
  Converts string 'Day/Month/Year' to 'Month/Day/Year' 
  including the conversion from string days/months to
  the numerical format

  Input:
    str date_str: string in %d\n%b\n%Y format
  Output:
    str new_date_str: string in %m\%d\%Y format
  """
  date_obj = datetime.strptime(date_str, "%d\n%b\n%Y")
  new_date_str = date_obj.strftime('%m/%d/%Y')
  return new_date_str

In [None]:
# example
change_date("1\nApr\n1992")

'04/01/1992'

In [None]:
df["Publ Date"] = df["Publ Date"].apply(change_date)

In [None]:
df.head()

Unnamed: 0_level_0,ISBN,Title,Author,Imprint,Publisher Group,Volume,Value,RRP,ASP,Binding,Publ Date,Product Class,book series,num pages,genres,shelves,num goodreads ratings,average goodreads ratings,goodreads rating distribution,num goodreads reviews
Goodreads_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
968.The_Da_Vinci_Code,9780552149518,"Da Vinci Code,The","Brown, Dan",Corgi Books,Transworld Grp,4522025,"$30,629,502.29",$10.71,$6.77,Paperback,03/01/2004,"F2.1 Crime, Thriller & Adventure",Robert Langdon #2,489.0,"{'genres': ['Fiction', 'Mystery', 'Thriller', ...","{'to-read': 416339, 'currently-reading': 19278...",2121534,3.88,"{'5 Stars': 731457, '4 Stars': 729405, '3 Star...",50199
72193.Harry_Potter_and_the_Philosopher_s_Stone,9780747532743,Harry Potter and the Philosopher's Stone,"Rowling, J. K.",Bloomsbury Publishing PLC,Bloomsbury Grp,3844316,"$26,603,271.16",$9.37,$6.99,Paperback,06/26/1997,Y2.1 Children's Fiction,Harry Potter #1,223.0,"{'genres': ['Fantasy', 'Fiction', 'Young Adult...","{'to-read': 1266232, 'currently-reading': 1870...",8334489,4.48,"{'5 Stars': 5420633, '4 Stars': 1906445, '3 St...",131608
15881.Harry_Potter_and_the_Chamber_of_Secrets,9780747538486,Harry Potter and the Chamber of Secrets,"Rowling, J. K.",Bloomsbury Publishing PLC,Bloomsbury Grp,3184492,"$21,740,189.45",$9.37,$6.79,Paperback,04/01/1999,Y2.1 Children's Fiction,Harry Potter #2,341.0,"{'genres': ['Fantasy', 'Fiction', 'Young Adult...","{'fantasy': 57513, 'favorites': 35022, 'fictio...",3214825,4.43,"{'5 Stars': 1906462, '4 Stars': 878809, '3 Sta...",63057
960.Angels_Demons,9780552150736,Angels and Demons,"Brown, Dan",Corgi Books,Transworld Grp,3096850,"$20,820,015.29",$10.71,$6.77,Paperback,07/01/2003,"F2.1 Crime, Thriller & Adventure",Robert Langdon #1,736.0,"{'genres': ['Fiction', 'Mystery', 'Thriller', ...","{'to-read': 656905, 'currently-reading': 16603...",2921948,3.92,"{'5 Stars': 1044923, '4 Stars': 989925, '3 Sta...",31070
2.Harry_Potter_and_the_Order_of_the_Phoenix,9780747551003,Harry Potter and the Order of the Phoenix,"Rowling, J. K.",Bloomsbury Publishing PLC,Bloomsbury Grp,3043226,"$45,460,077.79",$22.77,$14.94,Hardback,06/21/2003,Y2.1 Children's Fiction,Harry Potter #5,870.0,"{'genres': ['Fantasy', 'Young Adult', 'Fiction...","{'to-read': 400226, 'currently-reading': 62033...",2882627,4.5,"{'5 Stars': 1842775, '4 Stars': 726288, '3 Sta...",50446


## Issue 4: Make Genres into a new CSV, remove from the DataFrame

In [None]:
genres = pd.DataFrame(df.copy()["genres"])

In [None]:
genres.head()

Unnamed: 0_level_0,genres
Goodreads_ID,Unnamed: 1_level_1
968.The_Da_Vinci_Code,"{'genres': ['Fiction', 'Mystery', 'Thriller', ..."
72193.Harry_Potter_and_the_Philosopher_s_Stone,"{'genres': ['Fantasy', 'Fiction', 'Young Adult..."
15881.Harry_Potter_and_the_Chamber_of_Secrets,"{'genres': ['Fantasy', 'Fiction', 'Young Adult..."
960.Angels_Demons,"{'genres': ['Fiction', 'Mystery', 'Thriller', ..."
2.Harry_Potter_and_the_Order_of_the_Phoenix,"{'genres': ['Fantasy', 'Young Adult', 'Fiction..."


In [None]:
# make a list of genres
all = []

# for each book row
for row in genres.index:
  # collect the genres list
  row_val = genres.loc[row,"genres"].replace("'","\"")
  row_val = json.loads(row_val)
  # for each genre a book has
  for item in row_val["genres"]:
    # split if a genre is a Genre > Subgenre
    new_lst = [item]
    if ">" in item:
      new_lst = item.split(">")
    # for each genre (1 or 2 is a genre > subgenre)
    for item in new_lst:
      item = item.strip()
      # if we haven't encountered the genre before
      if item not in all:
        # add the item to the genre df
        all.append(item)
        genres[item] = False
      # if the book has the genre, mark the book,genre cell True
      genres.loc[row, item] = True

  


In [None]:
genres = genres.drop("genres", axis=1)

In [None]:
genres.head()

Unnamed: 0_level_0,Fiction,Mystery,Thriller,Mystery Thriller,Suspense,Historical,Historical Fiction,Adventure,Novels,Crime,Adult,Fantasy,Young Adult,Magic,Childrens,Middle Grade,Classics,Audiobook,Science Fiction Fantasy,Romance,Paranormal,Vampires,Paranormal Romance,Supernatural,Urban Fantasy,Teen,Science Fiction,Action,Shapeshifters,Werewolves,Contemporary,Adult Fiction,Drama,Neurodiversity,Autistic Spectrum Disorder,Psychology,Literature,Academic,School,Womens Fiction,...,Italian Literature,Scotland,Gothic,Foodie,Food Writing,How To,Japan,Asia,Horticulture,Gardening,Couture,Fashion,Realistic Fiction,LGBT,Gay,Canada,Dark,Ireland,Irish Literature,Trivia,Anthologies,Collections,Race,African American,Read For School,High School,Law,Coming Of Age,Personal Development,Sports,Fitness,Bangladesh,Contemporary Romance,Books About Books,Journalism,Engineering,Pop Culture,Australia,Roman,Anthropology
Goodreads_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
968.The_Da_Vinci_Code,True,True,True,True,True,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
72193.Harry_Potter_and_the_Philosopher_s_Stone,True,False,False,False,False,False,False,True,False,False,False,True,True,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
15881.Harry_Potter_and_the_Chamber_of_Secrets,True,False,False,False,False,False,False,True,False,False,False,True,True,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
960.Angels_Demons,True,True,True,True,True,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2.Harry_Potter_and_the_Order_of_the_Phoenix,True,False,False,False,False,False,False,True,False,False,False,True,True,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [None]:
df = df.drop("genres", axis=1)

In [None]:
genres.to_csv("/top100_genres.csv")

## Issue 5: Make Ratings Distributions into a new CSV, remove from the DataFrame

In [None]:
ratings = pd.DataFrame(df.copy()["goodreads rating distribution"])

In [None]:
ratings.head()

Unnamed: 0_level_0,goodreads rating distribution
Goodreads_ID,Unnamed: 1_level_1
968.The_Da_Vinci_Code,"{'5 Stars': 731457, '4 Stars': 729405, '3 Star..."
72193.Harry_Potter_and_the_Philosopher_s_Stone,"{'5 Stars': 5420633, '4 Stars': 1906445, '3 St..."
15881.Harry_Potter_and_the_Chamber_of_Secrets,"{'5 Stars': 1906462, '4 Stars': 878809, '3 Sta..."
960.Angels_Demons,"{'5 Stars': 1044923, '4 Stars': 989925, '3 Sta..."
2.Harry_Potter_and_the_Order_of_the_Phoenix,"{'5 Stars': 1842775, '4 Stars': 726288, '3 Sta..."


In [None]:
# Add col for each Star
ratings["5 Stars"] = None
ratings["4 Stars"] = None
ratings["3 Stars"] = None
ratings["2 Stars"] = None
ratings["1 Star"] = None

In [None]:
# In each book row
for row in ratings.index:
  row_val = ratings.loc[row,"goodreads rating distribution"].replace("'","\"")
  row_val = json.loads(row_val)
  # add the star value for each of the Stars 1-5
  ratings.loc[row,"5 Stars"] = int(row_val["5 Stars"])
  ratings.loc[row,"4 Stars"] = int(row_val["4 Stars"])
  ratings.loc[row,"3 Stars"] = int(row_val["3 Stars"])
  ratings.loc[row,"2 Stars"] = int(row_val["2 Stars"])
  ratings.loc[row,"1 Star"] = int(row_val["1 Star"])

In [None]:
ratings = ratings.drop("goodreads rating distribution", axis=1)

In [None]:
ratings.head()

Unnamed: 0_level_0,5 Stars,4 Stars,3 Stars,2 Stars,1 Star
Goodreads_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
968.The_Da_Vinci_Code,731457,729405,428021,149660,82991
72193.Harry_Potter_and_the_Philosopher_s_Stone,5420633,1906445,702884,163022,141505
15881.Harry_Potter_and_the_Chamber_of_Secrets,1906462,878809,350929,60870,17755
960.Angels_Demons,1044923,989925,599883,184617,102600
2.Harry_Potter_and_the_Order_of_the_Phoenix,1842775,726288,250988,44875,17701


In [None]:
ratings.to_csv("/top100_ratings.csv")

In [None]:
df = df.drop("goodreads rating distribution", axis=1)

## Issue 6: Make Shelves into a new CSV, remove from the DataFrame

In [None]:
shelves = pd.DataFrame(df.copy()["shelves"])

In [None]:
shelves.head()

Unnamed: 0_level_0,shelves
Goodreads_ID,Unnamed: 1_level_1
968.The_Da_Vinci_Code,"{'to-read': 416339, 'currently-reading': 19278..."
72193.Harry_Potter_and_the_Philosopher_s_Stone,"{'to-read': 1266232, 'currently-reading': 1870..."
15881.Harry_Potter_and_the_Chamber_of_Secrets,"{'fantasy': 57513, 'favorites': 35022, 'fictio..."
960.Angels_Demons,"{'to-read': 656905, 'currently-reading': 16603..."
2.Harry_Potter_and_the_Order_of_the_Phoenix,"{'to-read': 400226, 'currently-reading': 62033..."


In [None]:
# collect all shelve names
all = []

# for each book
for row in shelves.index:
  row_val = shelves.loc[row,"shelves"].replace("'","\"")
  row_val = json.loads(row_val)
  # for each shelf the book is in
  for item in row_val.keys():
    # clean the item
    item = item.strip()
    # if the item is not a shelf that had already been added
    if item not in all:
      # add the shelf to the all shelves list
      all.append(item)
      # add the shelf to the new dataframe
      shelves[item] = 0'
    # put the given shelf value into the shelf df
    shelves.loc[row, item] = int(row_val[item])

  # Remove the CWD from sys.path while we load stuff.


In [None]:
# drop shelves from our new dataframe
shelves = shelves.drop("shelves", axis=1)

In [None]:
shelves.head()

Unnamed: 0_level_0,to-read,currently-reading,fiction,mystery,thriller,favorites,own,owned,books-i-own,dan-brown,mystery-thriller,suspense,historical-fiction,adventure,series,novels,crime,default,adult,owned-books,contemporary,thrillers,adult-fiction,novel,religion,my-books,favourites,my-library,general-fiction,library,mystery-suspense,classics,historical,fantasy,contemporary-fiction,mysteries,history,book-club,2020,action,...,on-magnet,first-editions,owned-audiobooks,next-up,to-purchase,want-to-read-purchased,no,a20,book-club-suggestions,tbr-owned-physical,list,i-have,owned-book,get-rid-of,bought-for-others,1980-2010-media,eng,hilarious,romance-contemporary,office-romance,laugh-out-loud,lol,aborigines,narrativa,dünya-edebiyatı,deutsch,spirit,kütüphanem,kitaplığım,libreria,romanzo,inspiring,mark-haddon,tbr-owned,read-in-2007,bookcase,dawn-french,non-fiction-biography,dawn,mum
Goodreads_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
968.The_Da_Vinci_Code,416339,19278,18014,9726,7441,7409,3673,3566,2954,2726,1861,1804,1770,1455,1313,1299,1255,1207,1097,975,970,881,832,821,749,712,672,612,590,531,510,487,482,474,456,452,445,427,407,392,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
72193.Harry_Potter_and_the_Philosopher_s_Stone,1266232,187020,20546,875,0,58424,10986,11206,11020,0,0,0,0,3204,6726,1618,0,1349,0,2998,0,0,0,875,0,1985,6420,1566,0,1182,0,3201,0,68800,0,0,0,0,5225,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
15881.Harry_Potter_and_the_Chamber_of_Secrets,546,0,16632,839,0,35022,9572,9810,9757,0,0,0,0,2487,5591,1289,0,1272,0,2678,0,0,0,702,0,1700,4068,1360,0,996,0,1825,0,57513,0,0,0,0,4200,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
960.Angels_Demons,656905,16603,12561,7273,6419,5985,2794,2636,2120,2652,1599,1502,1055,1162,1343,858,1130,962,796,731,690,781,563,499,544,536,503,448,413,451,419,161,315,446,307,306,243,143,311,363,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2.Harry_Potter_and_the_Order_of_the_Phoenix,400226,62033,14912,591,0,39184,8927,8965,8992,0,0,0,0,2211,5004,1110,0,1215,0,2434,0,0,0,616,0,1542,4422,1264,0,876,0,1510,0,53150,0,0,0,0,3121,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
# save new dataframe
shelves.to_csv("/top100_shelves.csv")

In [None]:
# drop shelves from the original shelf
df = df.drop("shelves", axis=1)

## Save the Final DataFrame

In [None]:
df.to_csv("/top100_cleaned.csv")