In [13]:
%load_ext autoreload
%autoreload 2

import utils
import config
import plotly.express as px

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [14]:
keywords = config.Keywords.load()
grants = config.Grants.load()
cats = config.Categories.load()

In [38]:
import pandas as pd
import numpy as np
from datetime import datetime
import plotly.graph_objects as go

# Create keyword-to-category mapping
keyword_to_category = {}
for _, row in cats.iterrows():
    category_name = row['name']
    keywords_list = row['keywords']
    for keyword in keywords_list:
        keyword_to_category[keyword] = category_name

# Explode grants from keywords
keywords_exploded = keywords.explode('grants')
keywords_exploded = keywords_exploded.rename(columns={'grants': 'grant_id', 'name': 'keyword_name'})

# Map keywords to categories
keywords_exploded['category'] = keywords_exploded['keyword_name'].map(keyword_to_category)

# Merge with grants data
grant_category_link = keywords_exploded[['grant_id', 'category']].dropna()
grant_category_link = grant_category_link.merge(grants[['id', 'funding_amount', 'start_year']], 
                                                  left_on='grant_id', right_on='id', how='inner')

# Filter for past 5 years
current_year = datetime.now().year
past_5_years = current_year - 5
grant_category_link = grant_category_link[grant_category_link['start_year'] >= past_5_years]

# Aggregate by category and year - count grants and sum funding
category_year_stats = grant_category_link.groupby(['category', 'start_year']).agg({
    'grant_id': 'count',
    'funding_amount': 'sum'
}).reset_index()
category_year_stats = category_year_stats.rename(columns={'grant_id': 'num_grants'})

# Find all unique years
all_available_years = sorted(category_year_stats['start_year'].unique())
num_years = len(all_available_years)

# Find categories with data for ALL years and calculate growth
growth_analysis = []
for category in category_year_stats['category'].unique():
    cat_data = category_year_stats[category_year_stats['category'] == category].sort_values('start_year')
    
    # Only consider categories with data for all years
    if len(cat_data) == num_years:
        first_year_grants = cat_data.iloc[0]['num_grants']
        last_year_grants = cat_data.iloc[-1]['num_grants']
        first_year_funding = cat_data.iloc[0]['funding_amount']
        last_year_funding = cat_data.iloc[-1]['funding_amount']
        
        if first_year_grants > 0:
            grants_growth_rate = (last_year_grants - first_year_grants) / first_year_grants
            funding_growth_rate = (last_year_funding - first_year_funding) / first_year_funding if first_year_funding > 0 else 0
            
            growth_analysis.append({
                'category': category,
                'grants_growth_rate': grants_growth_rate,
                'funding_growth_rate': funding_growth_rate,
                'first_year_grants': first_year_grants,
                'last_year_grants': last_year_grants,
                'total_grants': cat_data['num_grants'].sum(),
                'avg_grants': cat_data['num_grants'].mean(),
                'num_years': len(cat_data)
            })

growth_df = pd.DataFrame(growth_analysis)

# Select top 3 fastest growing categories (with all years of data)
fastest_growing = growth_df.nlargest(3, 'grants_growth_rate')['category'].tolist()
selected_categories = category_year_stats[category_year_stats['category'].isin(fastest_growing)].sort_values('start_year')

# Define distinct colors for each category
category_colors = {
    fastest_growing[0]: 'rgb(31, 119, 180)',   # Blue
    fastest_growing[1]: 'rgb(255, 127, 14)',   # Orange
    fastest_growing[2]: 'rgb(44, 160, 44)'     # Green
}

# Create figure with multiple categories
all_years = sorted(selected_categories['start_year'].unique())

fig = go.Figure()

# Create frames for animation
frames = []
for year in all_years:
    year_data = selected_categories[selected_categories['start_year'] <= year]
    
    traces = []
    
    # Add line and bubbles for each category
    for category in fastest_growing:
        cat_data = year_data[year_data['category'] == category]
        
        if len(cat_data) > 0:
            # Line trace
            line_trace = go.Scatter(
                x=cat_data['start_year'],
                y=cat_data['num_grants'],
                mode='lines',
                line=dict(width=2, color=category_colors[category]),
                showlegend=False,
                hoverinfo='skip',
                name=category,
                legendgroup=category
            )
            
            # Bubble trace
            bubble_trace = go.Scatter(
                x=cat_data['start_year'],
                y=cat_data['num_grants'],
                mode='markers',
                marker=dict(
                    size=cat_data['funding_amount'] / selected_categories['funding_amount'].max() * 60,
                    color=category_colors[category],
                    line=dict(width=1, color='white'),
                    opacity=0.8
                ),
                text=[f"Category: {category}<br>Year: {int(y)}<br>Grants: {int(g)}<br>Funding: ${f:,.0f}" 
                      for y, g, f in zip(cat_data['start_year'], cat_data['num_grants'], cat_data['funding_amount'])],
                hovertemplate='%{text}<extra></extra>',
                name=category,
                legendgroup=category,
                showlegend=True
            )
            
            traces.extend([line_trace, bubble_trace])
    
    frames.append(go.Frame(data=traces, name=str(year)))

# Set initial data (first frame)
for trace in frames[0].data:
    fig.add_trace(trace)

# Add frames to figure
fig.frames = frames

# Update layout
fig.update_layout(
    title=dict(
        text='Fastest Growing Categories: Number of Grants Over Time',
        y=0.95,
        x=0.5,
        xanchor='center',
        yanchor='top'
    ),
    xaxis=dict(title='Year', dtick=1, range=[all_years[0] - 0.5, all_years[-1] + 0.5]),
    yaxis=dict(title='Number of Grants', range=[0, selected_categories['num_grants'].max() * 1.1]),
    height=700,
    legend=dict(x=1.05, y=1, xanchor='left', yanchor='top'),
    updatemenus=[dict(
        type='buttons',
        showactive=False,
        buttons=[
            dict(label='Play',
                 method='animate',
                 args=[None, dict(frame=dict(duration=800, redraw=True), fromcurrent=True)]),
            dict(label='Pause',
                 method='animate',
                 args=[[None], dict(frame=dict(duration=0, redraw=False), mode='immediate')])
        ],
        x=0.1, 
        y=-0.15,
        xanchor='left',
        yanchor='top'
    )],
    sliders=[dict(
        active=0,
        steps=[dict(args=[[f.name], dict(frame=dict(duration=0, redraw=True), mode='immediate')],
                    method='animate',
                    label=str(year)) for f, year in zip(frames, all_years)],
        x=0.1, 
        y=-0.25, 
        len=0.8,
        xanchor='left',
        yanchor='top'
    )]
)

fig.show()
