In [1]:
import pandas as pd 
import altair as alt

alt.data_transformers.disable_max_rows()

data = pd.read_csv("../input/marvel-comic-books/Marvel_Comics.csv", parse_dates=['publish_date'], na_values='None')
df = pd.DataFrame(data)

In [2]:
selection = alt.selection(type='single')

writers = pd.DataFrame(df['writer'].value_counts().head(20))
writers.reset_index(inplace=True)
writers.columns = ['Writer', 'Total Issues']

alt.Chart(writers).mark_bar().encode(
    alt.X('Total Issues', title='Total Issues Written'),
    alt.Y('Writer', sort='-x'),
    tooltip = ['Writer','Total Issues'],
    opacity=alt.condition(selection,alt.value(1),alt.value(.2))
).properties(
    title='Top 20 Marvel Writers (1939-2022)',
    width=1000,
    height=500
).add_selection(selection)

In [3]:
pencilers = pd.DataFrame(df['penciler'].value_counts().head(20))
pencilers.reset_index(inplace=True)
pencilers.columns = ['Penciler', 'Total Issues']

alt.Chart(pencilers).mark_bar().encode(
    alt.X('Total Issues', title='Total Issues Penciled'),
    alt.Y('Penciler', sort='-x'),
    tooltip = ['Penciler','Total Issues'],
    opacity=alt.condition(selection,alt.value(1),alt.value(.2))
).properties(
    title='Top 20 Marvel Pencilers (1939-2022)',
    width=1000,
    height=500
).add_selection(selection)

In [4]:
comics = pd.DataFrame(df['comic_name'].value_counts().head(20))
comics.reset_index(inplace=True)
comics.columns = ['Comic', 'Total Issues']

alt.Chart(comics).mark_bar().encode(
    alt.X('Total Issues'),
    alt.Y('Comic', sort='-x'),
    tooltip = ['Comic','Total Issues'],
    opacity=alt.condition(selection,alt.value(1),alt.value(.2))
).properties(
    title='Top 20 Longest Running Marvel Comics (By Issue Count)',
    width=1000,
    height=500
).add_selection(selection)

In [5]:
new_df = df
new_df['year'] = pd.to_datetime(df['publish_date']).dt.strftime('%Y')

dates = pd.DataFrame(new_df['year'].value_counts())
dates.reset_index(inplace=True)
dates.columns = ['Year', 'Total Issues']
dates['Year'] = pd.to_datetime(dates['Year'], format='%Y')
dates = dates[dates['Year'] < pd.to_datetime(2023, format='%Y')]

In [6]:
nearest = alt.selection(type='single', nearest=True, on='mouseover', fields=['Year'], empty='none')

line = alt.Chart(dates).mark_line().encode(
    alt.X('Year:T'),
    alt.Y('Total Issues:Q')
)

selectors = alt.Chart(dates).mark_point().encode(
    x='Year:T',
    opacity=alt.value(0),
).add_selection(
    nearest
)

points = line.mark_point().encode(
    opacity=alt.condition(nearest, alt.value(1), alt.value(0))
)

text = line.mark_text(align='left', dx=5, dy=-5).encode(
    text=alt.condition(nearest, 'Total Issues:Q', alt.value(' '))
)

rules = alt.Chart(dates).mark_rule(color='gray').encode(
    x='Year:T'
).transform_filter(
    nearest
)

alt.layer(
    line, selectors, points, rules, text
).properties(
    title='Number of Issues Released Per Year (1939-2022)',
    width=1000,
    height=500
)