## Animated Line Graph
### Last edited: Thu 3/27

### Description: Generates animated line graph of composer popularity over time.

In [2]:
# Import Statements
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from common import *

%matplotlib widget

In [None]:
# Unpickle dataframes
concerts = pd.read_pickle(DF_FILE_LOC_MPL + "concerts.pkl")
works = pd.read_pickle(DF_FILE_LOC_MPL + "works.pkl")

In [4]:
# Clean works df
works = works.query('id != "0*"') # remove all entries that are intermissions

# Normalize title if it is a dictionary
def normalize_title(title):
    if isinstance(title, dict):
        return title.get("em", "") + " " + title.get("_", "")
    else:
        return title
works['n_title'] = works['title'].apply(normalize_title)

## Add Date column to concerts and works

# extract date from concert-info dictionary as new column
concerts['date'] = concerts['concerts'].apply(lambda x : x[0].get('Date', None))
# Convert extracted date to ISO8601, UTC datetime format
concerts['date'] = pd.to_datetime(concerts['date'], utc=True)
# Merge works with the datetime column
works = works.merge(concerts[['programID', 'date']], on='programID', how='left')

In [None]:
# Animated Bar Graph of Composer Frequency over Time

# Setup and Modifiable Variables
uniq_composers = works["composer"].unique()
comp_freq = pd.Series(np.zeros(len(uniq_composers)), index=uniq_composers) # create composer frequency from zero
start_year = 1842 # start date of animation, first date of concerts
end_year = 2025
year_range = 5 # modify to adjust for specificity of animation
top_N = 10 # modify to adjust number of composers displayed

# Plot setup
fig, ax = plt.subplots()
bars = ax.barh([], [])
ax.set(xlim=[0, 5000], xlabel='Popularity (Times Programmed)', ylabel='Composer')

# Group works by their year
works_by_year = works.groupby(works.date.dt.year)['composer'].unique().to_dict()

# Function to upate comp_freq with data from specific year_range
def update_freq(composer):
    global comp_freq
    if composer in comp_freq:
        comp_freq[composer] += 1

# Function to update each frame of the animation
def update(frame):
    global comp_freq, bars

    curr_year = start_year + frame * year_range
    if curr_year >= end_year:
        plt.close(fig)
        return

    # curr_year, curr_year + year_range)
    for year in range(curr_year, min(curr_year + year_range, 2025)):
        # Loop through all works in this year and update composer
        if year in works_by_year:
            new_counts = pd.Series(works_by_year[year]).value_counts()
            comp_freq = comp_freq.add(new_counts, fill_value=0).astype(int)
    
    top = comp_freq.nlargest(top_N)
    
    ax.clear()
    ax.set_xlim([0, 5000])
    ax.set_xlabel("Popularity (Times Programmed)")
    ax.set_ylabel("Composer")
    ax.set_title(f"Composer Popularity Over Time ({curr_year}-{min(curr_year + year_range, 2025)})")
    ax.invert_yaxis()
    bars = ax.barh(top.index, top.values, color="skyblue")
    return bars

num_frames = (end_year - start_year) // year_range
ani = animation.FuncAnimation(fig=fig, func=update, frames=range(num_frames), interval=2000, blit=True)
plt.show()