## Notebook for making altair plots of modern romance summary data and trends in romance themes through time

In [67]:
import numpy as np
import altair as alt
import pandas as pd
import numpy as np
import statsmodels.api as sm
import scipy.stats  as stats
from collections import defaultdict

In [68]:
from altair import Chart, Color, Scale

In [69]:
alt.renderers.enable('jupyterlab')

RendererRegistry.enable('jupyterlab')

### Themes

In [70]:
plot_data=pd.read_pickle("plots/themes_time_bigger.pkl")


In [71]:
#exclude all entries prior to January 1 2013
plot_data=plot_data[plot_data['dts']>=pd.to_datetime('01-01-2013')]
x = plot_data['dts']

In [72]:
plot_data.head()

Unnamed: 0,dts,Royalty,Military,Supernatural,Holiday,Bdsm,Adventure
50,2013-01-01,0.022768,0.010932,0.064442,0.007891,0.015535,0.033454
40,2013-02-01,0.023493,0.010278,0.06332,0.008993,0.021474,0.040011
85,2013-03-01,0.027703,0.009375,0.056588,0.006503,0.022635,0.040878
6,2013-04-01,0.02201,0.011754,0.060902,0.007416,0.021852,0.039602
96,2013-05-01,0.022291,0.010118,0.055885,0.007114,0.017627,0.03644


In [73]:
#set the index to dts so all the other columns can subsequently be stacked
plot_data=plot_data.set_index('dts')

In [74]:
plot_data=plot_data.stack()

In [75]:
plot_data=plot_data.reset_index()

In [76]:
plot_data=plot_data.rename(index=str,columns={"date":"date","theme":"theme",0:"frequency"})

In [22]:
x=alt.X('num_ratings', title="Number of ratings"),

In [77]:
highlight = alt.selection(type='single', on='mouseover',
                          fields=['theme'], nearest=True)

base = alt.Chart(plot_data).encode(
    x=alt.X('date:T', title="Date"),
    y=alt.Y('frequency:Q',title="Frequency"),
    color=alt.Color('theme:N',title="Theme")
)

points = base.mark_circle().encode(
    opacity=alt.value(0)
).add_selection(
    highlight
).properties(
    width=600
)

lines = base.mark_line().encode(
    size=alt.condition(~highlight, alt.value(1), alt.value(3))
)

points + lines


In [78]:
base = base.configure(autosize='pad')

In [51]:
base.save('themes_altair.json')

### Plot-- the books

In [52]:
df=pd.read_pickle('plots/data_figure1.pkl')

In [54]:
df.head()

Unnamed: 0,authors,awards,date,description,isbn,link,num_ratings,num_reviews,rating,review_dates,review_ids,review_texts,reviewer_names,reviewer_ratings,reviewer_scores,reviews_together,title,year
40,[Jennifer Crusie],[All About Romance (AAR) Annual Reader Poll fo...,December 1st 2006,"For Nina Askew, turning forty means freedom--f...",037377138X,http://www.goodreads.com/book/show/33732.Anyon...,17661.0,928.0,3.8,"[Mar 11, 2010, Sep 20, 2009, Feb 27, 2009, Dec...","[review_93605589, review_71904939, review_4772...",[[Opening Line: “The last thing Nina Askew nee...,"[Buggy, Shawna, Kathrynn, Ⓐlleskelle - teamSør...","[really liked it, it was amazing, it was amazi...","[4, 5, 5, 5, 3, 5, 5, 4, 3, 4, 4, 5, 5, 3, 4, ...",Opening Line: “The last thing Nina Askew neede...,Anyone But You,2006.0
45,[Rachel Gibson],[OKRWA National Readers Choice Award for Singl...,January 1st 1998,Georgeanne Howard leaves her fiancé at the alt...,0380790076,http://www.goodreads.com/book/show/60220.Simpl...,18156.0,635.0,3.86,"[Oct 20, 2010, Sep 30, 2009, Apr 04, 2017, Jun...","[review_127198135, review_73023830, review_195...",[[3.5 STARSI must admit I was a bit disappoint...,"[valee, Auntee, Nenia *The Flagrant Liberal* C...","[liked it, it was amazing, it was ok, it was o...","[3, 5, 2, 2, 4, 4, 3, 4, 4, 5, 4, 5, 4, 3, 5, ...",3.5 STARSI must admit I was a bit disappointed...,Simply Irresistible\n \n (Chinoo...,1998.0
59,[Kallypso Masters],[],December 13th 2012,Librarian's Note: This is an alternate cover f...,1481152718,http://www.goodreads.com/book/show/17325954-no...,11078.0,579.0,4.31,"[Apr 20, 2013, Nov 28, 2011, Nov 14, 2012, Jun...","[review_595540086, review_240655375, review_45...",[[1 star. DNF at 52 %*Emotional porn at its wo...,"[Baba ♥♥♥ Tyler, Marcus, Archer, Dean, Adrian...","[did not like it, it was amazing, it was ok, r...","[1, 5, 2, 4, 5, 1, 5, 2, 4, 5, 5, 5, 5, 5, 5, ...","[ in the hospital (hide spoiler)] Ka-Thunk ,Ka...",Nobody's Hero\n \n (Rescue Me Sa...,2012.0
61,[Kallypso Masters],[],January 24th 2013,NOBODY'S PERFECT is the continuing story of Sa...,1480096954,http://www.goodreads.com/book/show/17299927-no...,8462.0,506.0,4.34,"[Jun 09, 2012, Feb 25, 2012, May 28, 2012, Mar...","[review_345240483, review_283985473, review_33...","[[Nobody's perfect, mi sueño, but you're about...","[~ Becs ~, CaroleDee, Nia, Becca, Amy, Michael...","[really liked it, it was amazing, it was amazi...","[4, 5, 5, 2, 2, 5, 2, 5, 4, 5, 1, 5, 3, 5, 4, ...","Nobody's perfect, mi sueño, but you're about a...",Nobody's Perfect\n \n (Rescue Me...,2013.0
66,[Kallypso Masters],[],December 22nd 2013,When Marc d’Alessio first rescued the curvaceo...,,http://www.goodreads.com/book/show/17207097-so...,3940.0,331.0,4.28,"[Jan 04, 2013, Nov 12, 2013, Jan 02, 2014, Dec...","[review_494970690, review_763151413, review_80...","[[I like this series, but this one was a littl...","[Vivian, Sylvia, Morgan, Sue, Rebekah, Dianne,...","[liked it, did not like it, it was ok, really ...","[3, 1, 2, 4, 2, 4, 2, 4, 2, 5, 5, 5, 4, 5, 4, ...",It's LIVE! It's live! It's finally live! OMG! ...,Somebody's Angel\n \n (Rescue Me...,2013.0


In [55]:
def color_picker(x):
    if len(x) == 0:
        return 'No awards'
    else:
        return 'One or more awards'

In [56]:
df['size']=df['num_reviews']/50
df['color']=df['awards'].map(lambda x: color_picker(x))
df['authors']=df['authors'].map(lambda x: " ".join(x))
df['num_awards']=df['awards'].map(len)

In [57]:
alt.data_transformers.enable('default', max_rows=None)

DataTransformerRegistry.enable('default')

In [60]:
Chart=alt.Chart(df[['authors','num_ratings','num_reviews','title','num_awards','rating']]).mark_point().encode(
    x=alt.X('num_ratings', title="Number of ratings"),
    y=alt.Y('rating',
           scale=alt.Scale(zero=False),title='Average rating'),
    color=alt.Color('num_awards', 
                    scale=Scale(domain=[0,25],range=['pink','purple']),
                    legend=alt.Legend(title="Number of awards")),
    size=alt.Size('num_reviews', legend=alt.Legend(title="Number of reviews")),
    tooltip=['title', 'authors']).interactive()


In [63]:
Chart = Chart.configure(autosize='pad',padding={"left": 5, "top": 5, "right": 50, "bottom": 5})

In [66]:
Chart.save('chart.html')