In [None]:
import pandas as pd
import altair as alt

In [None]:
full_df = pd.read_csv("../data/goodreads_library_export.csv")

In [None]:
full_df.columns

In [None]:
relevant_cols = ["Book Id", 
        "Title", 
        "Author", 
        "ISBN", 
        "ISBN13", 
        "My Rating", 
        "Average Rating", 
        "Publisher", 
        "Number of Pages", 
        "Year Published",
        "Date Read",
        "Date Added",
        "Bookshelves",
        "Exclusive Shelf",
        "Read Count"
       ]

cols = {}
for col in relevant_cols:
    cols[col] = col.replace(" ", "_").lower()

In [None]:
df = full_df[cols.keys()].rename(columns=cols)

In [None]:
df[["isbn", "isbn13"]] = df[["isbn", "isbn13"]].replace(regex='["=]', value="")

In [None]:
df = df.astype({"date_read": "datetime64", "date_added": "datetime64"})

In [None]:
alt.Chart(df).mark_bar().encode(
    x="count()",
    y="exclusive_shelf"
)

In [None]:
year = 2019
main = alt.Chart(df.loc[df["exclusive_shelf"] == "read"]).transform_filter(
    f"year(datum.date_read) >= {year}").mark_rect(tooltip=True).encode(
        x=alt.X("day(date_read):N", axis=None),
        y="date(date_read):N",
        color=alt.Color("count():O", legend=None))

week_summary = alt.Chart(df.loc[df["exclusive_shelf"] == "read"]).transform_filter(
    f"year(datum.date_read) >= {year}").mark_text().encode(
        x="day(date_read):N",
        text="count():Q")

(main & week_summary)

In [None]:
df[df.duplicated(subset="title", keep=False)].sort_values(by="title")

In [None]:
df["genre_shelf"] = df["bookshelves"].str.extract(r"(read-[^,]*)").replace(regex="read-", value="")
df

In [None]:
alt.Chart(df.loc[df["exclusive_shelf"] == "read"]).mark_bar(tooltip=True).encode(
    y="count():Q",
    x="genre_shelf:N",
    color="genre_shelf:N",
    #tooltip=["title", "author"]
)#.properties(width=1000, height=500)

In [None]:
alt.Chart(df.loc[df["exclusive_shelf"] == "read"]).mark_bar(tooltip=True).encode(
    y="mean(average_rating):Q",
    x="genre_shelf:N",
    color="genre_shelf:N",
    #tooltip=["title", "author"]
)#.properties(width=1000, height=500)

In [None]:
alt.Chart(df.loc[df["exclusive_shelf"] == "read"]).mark_circle(tooltip=True).encode(
    y=alt.Y("average_rating:Q", scale=alt.Scale(zero=False)),
    x="number_of_pages:Q",
    color="genre_shelf:N",
    #tooltip=["title", "author"]
).properties(width=1000, height=500).interactive()