In [None]:
# %pip install altair
# %pip install ipywidgets

#%pip install "vegafusion[embed]>=1.5.0"

In [None]:
import pandas as pd
import numpy as np
import ipywidgets as widgets
from ipywidgets import interact, interactive
import altair as alt
alt.data_transformers.enable('default', max_rows=None)
#alt.data_transformers.enable("vegafusion")

In [None]:
df_topic_docs = pd.read_csv("topic_model_viz/topic_docs_table.csv", index_col=0)
df_topic_docs

## Most Popular Books

In [None]:
## Most Popular Books (by amount of reviews)
popular_books = pd.read_csv('../pre_processing_notebooks/data/most_popular_books.tsv', sep='\t')

alt.Chart(popular_books[:200]).mark_bar(color='#3f8131').encode(\
    x=alt.X('title', sort='-y', axis=alt.Axis(titleFontSize=14, labelFontSize=16)).title('Book Title'),
    y=alt.Y('num_ratings', axis=alt.Axis(titleFontSize=14, labelFontSize=16)).title('Ratings'),
    tooltip=['author', 'title', 'num_ratings']
)

In [None]:
## Most Popular Authors (by amount of reviews and books)
popular_authors = popular_books.groupby('author').agg(
    Total_Reviews=('num_ratings', 'sum'),
    Books_Rated=('num_ratings', 'count'),
    Avg_Mode_Rating=('mode_rating', 'mean')
).sort_values(ascending=False, by='Total_Reviews').reset_index()

#print(popular_authors)

alt.Chart(popular_authors).mark_circle(size=100).encode(
    x=alt.X('Books_Rated:Q', sort='-y', axis=alt.Axis(titleFontSize=18, labelFontSize=16)).title('# Books'),
    y=alt.Y('Total_Reviews:Q', axis=alt.Axis(titleFontSize=18, labelFontSize=16)).title('# Reviews'),
    color='Avg_Mode_Rating:Q',
    tooltip=['author', 'Total_Reviews', 'Books_Rated' ,'Avg_Mode_Rating']
).properties(
    width=1200,
    height=800
).interactive()

## Explore Topics by Genre

In [None]:
df_doc_viz = pd.read_csv("topic_model_viz/book_topic.tsv", index_col=0, sep="\t")

df_doc_viz.index = df_doc_viz.index.astype(str)
df_doc_viz['doc_id'] = df_doc_viz.index


genres = df_doc_viz['genre'].dropna().unique()
genres.sort()

selectGenre = alt.selection_point(
    name='Select', # name the selection 'Select'
    fields=['genre'], # limit selection to the genre field
    value=genres[0], # use first genre entry as initial value
    bind='legend' # bind to a menu of unique genre values
)

alt.Chart(df_doc_viz).mark_circle().add_params(
    selectGenre
).encode(
    x='doc_x',
    y='doc_y',
    tooltip=['doc_id', 'title', 'author'],
    color={
      "field": "genre",
      "scale": {"scheme": "category20b"},
      "legend": {"labelFontSize": 16, "titleFontSize": 18}
    },
    opacity=alt.condition(selectGenre, alt.value(0.95), alt.value(0.01))
).properties(
        width=1200,
        height=800
    ).interactive()


## Visualize Reviews Info

In [None]:
all_reviews = pd.read_csv("topic_model_viz/all_valid_reviews_viz.tsv", sep="\t", index_col=0)
all_reviews.index = all_reviews.index.astype(str)

# Define a function to filter the DataFrame based on column value
def filter_reviews_dataframe(source, publisher, nur, topic):
    display_df = all_reviews
    if source != 'All':
        display_df = display_df[display_df['source'] == source]
    if publisher != 'All':
        display_df = display_df[display_df['publisher'] == publisher]
    if nur != 'All':
        display_df = display_df[display_df['nur_names'] == nur]
    if topic != 'All':
        display_df = display_df[display_df['topic_id'] == topic]
    
    display(display_df)
    return display_df


# Create interactive widgets for selecting values
source_dropdown = widgets.Dropdown(
    options=['All'] + list(all_reviews['source'].unique()),
    description='Source:'
)
publisher_dropdown = widgets.Dropdown(
    options=['All'] + list(all_reviews['publisher'].unique()),
    description='Publisher:'
)
nur_dropdown = widgets.Dropdown(
    options=['All'] + list(all_reviews['nur_names'].unique()),
    description='NUR:'
)
topic_dropdown = widgets.Dropdown(
    options=['All'] + sorted(list(all_reviews['topic_id'].unique())),
    description='Topic:'
)

## (These observers will work assuming we always apply the filters top down)
# Define callback function to update value dropdown options
def update_publisher_dropdown(*args):
    if source_dropdown.value != 'All':
        unique_values = all_reviews[all_reviews['source'] == source_dropdown.value]['publisher'].unique()
        publisher_dropdown.options = ['All'] + list(unique_values)
    else:
        publisher_dropdown.options = ['All'] + list(all_reviews['publisher'].unique())

def update_nur_dropdown(*args):
    if source_dropdown.value != 'All':
        unique_values = all_reviews[all_reviews['source'] == source_dropdown.value]
        if publisher_dropdown.value != 'All':
            unique_values = unique_values[unique_values['publisher'] == publisher_dropdown.value]
        unique_values = unique_values['nur_names'].unique()
        nur_dropdown.options = ['All'] + list(unique_values)
    else:
        nur_dropdown.options = ['All'] + list(all_reviews['nur_names'].unique())

# Register callback functions with dropdowns
source_dropdown.observe(update_publisher_dropdown, 'value')
publisher_dropdown.observe(update_nur_dropdown, 'value')

# Create interactive display
filtered_reviews_widget = interactive(filter_reviews_dataframe, source=source_dropdown, publisher=publisher_dropdown, nur=nur_dropdown, topic=topic_dropdown)

display(filtered_reviews_widget)

In [None]:
current_df = filtered_reviews_widget.result

def calculate_mode(series):
    mode_values = series.mode()
    if len(mode_values) == 1:
        return mode_values[0]
    else:
        return np.mean(mode_values.tolist()) 

mode_rating = current_df.groupby(current_df.index)['rating'].apply(calculate_mode)


filtered_indices = df_doc_viz.index.intersection(current_df.index)
visualize_docs = df_doc_viz.loc[filtered_indices]
visualize_docs['mode_rating'] = mode_rating

print(visualize_docs.shape)

scatter_plot = alt.Chart(visualize_docs
).mark_circle().encode(
    x='doc_x',
    y='doc_y',
    tooltip=['doc_id', 'title', 'mode_rating'],
    color='genre:N',
    size='mode_rating:Q'
).properties(
    width=1400,
    height=800
).interactive()

scatter_plot.show() 

## Explore Reviews by Date

In [None]:
brush_selection = alt.selection_interval(
    encodings=['x'] # limit selection to x-axis (rating) values
)

# dynamic query histogram
reviews_chart = alt.Chart(all_reviews).mark_bar().add_params(
    brush_selection
).encode(
    alt.X('review_date:T', title='Review Date'),
    alt.Y('count():Q', title='# of Reviews')
).properties(
    width=1450,
    height=250
)

# scatter plot, modify opacity based on selection
topic_books = alt.Chart(all_reviews).mark_circle().encode(
    x='doc_x',
    y='doc_y',
    tooltip=['title', 'author'],
    color={
      "field": "genre",
      "scale": {"scheme": "category20b"},
      "legend": {"labelFontSize": 16, "titleFontSize": 18}
    },
    opacity=alt.condition(brush_selection, alt.value(0.95), alt.value(0.005))
).properties(
    width=1450,
    height=800
).interactive()

composed_chart = alt.vconcat(reviews_chart, topic_books).properties(spacing=5)
composed_chart