# Growth Analysis: Pokemon Cards & Star Wars Figures
## Identifying Highest Growth Collectibles (Graded/Authentic Only)

This notebook analyzes price growth over time for:
1. Pokemon cards (Graded prices, 2020-2025)
2. Star Wars action figures (Authentic only, 2009-2025)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 8)

%matplotlib inline

---
## Part 1: Pokemon Card Growth Analysis
---

In [None]:
# Load Pokemon data
df_pk = pd.read_csv('../data/pokemon/final_dataset.csv')
df_pk['Date'] = pd.to_datetime(df_pk['Date'])

# Filter for graded prices and exclude booster products
df_pk = df_pk[df_pk['Graded'] > 0]
df_pk = df_pk[~df_pk['Card Name'].str.contains('Booster|Pack|Box', case=False, na=False)]

print(f"Total records: {len(df_pk):,}")
print(f"Unique cards: {df_pk['Card Name'].nunique()}")
print(f"Date range: {df_pk['Date'].min().strftime('%Y-%m')} to {df_pk['Date'].max().strftime('%Y-%m')}")

In [None]:
# Calculate growth for each card
growth_results = []

for card in df_pk['Card Name'].unique():
    card_data = df_pk[df_pk['Card Name'] == card].sort_values('Date')
    
    if len(card_data) >= 2:
        first_price = card_data.iloc[0]['Graded']
        last_price = card_data.iloc[-1]['Graded']
        first_date = card_data.iloc[0]['Date']
        last_date = card_data.iloc[-1]['Date']
        
        if first_price > 0:
            pct_growth = ((last_price - first_price) / first_price) * 100
            abs_growth = last_price - first_price
            
            growth_results.append({
                'Card': card,
                'First Price': first_price,
                'Last Price': last_price,
                'Absolute Growth': abs_growth,
                'Growth %': pct_growth,
                'First Date': first_date,
                'Last Date': last_date,
                'Months': (last_date - first_date).days / 30.44
            })

pk_growth_df = pd.DataFrame(growth_results).sort_values('Growth %', ascending=False)
print(f"\nAnalyzed {len(pk_growth_df)} Pokemon cards with sufficient data")

In [None]:
print("TOP 10 POKEMON CARDS BY GROWTH %")
print("="*80)

for idx, row in pk_growth_df.head(10).iterrows():
    print(f"\n{row['Card']}")
    print(f"  {row['First Date'].strftime('%Y-%m')}: ${row['First Price']:,.0f} → "
          f"{row['Last Date'].strftime('%Y-%m')}: ${row['Last Price']:,.0f}")
    print(f"  Growth: {row['Growth %']:+.1f}% (${row['Absolute Growth']:,.0f})")
    print(f"  Period: {row['Months']:.1f} months")

In [None]:
print("\nBOTTOM 10 POKEMON CARDS BY GROWTH % (Biggest Declines)")
print("="*80)

for idx, row in pk_growth_df.tail(10).iterrows():
    print(f"\n{row['Card']}")
    print(f"  {row['First Date'].strftime('%Y-%m')}: ${row['First Price']:,.0f} → "
          f"{row['Last Date'].strftime('%Y-%m')}: ${row['Last Price']:,.0f}")
    print(f"  Growth: {row['Growth %']:+.1f}% (${row['Absolute Growth']:,.0f})")
    print(f"  Period: {row['Months']:.1f} months")

In [None]:
# Visualize top 15 growers
fig, ax = plt.subplots(figsize=(12, 8))

top_15 = pk_growth_df.head(15).sort_values('Growth %')
colors = ['green' if x > 0 else 'red' for x in top_15['Growth %']]

ax.barh(range(len(top_15)), top_15['Growth %'], color=colors, alpha=0.7, edgecolor='black')
ax.set_yticks(range(len(top_15)))
ax.set_yticklabels(top_15['Card'])
ax.set_xlabel('Growth %', fontsize=12)
ax.set_title('Top 15 Pokemon Cards by Price Growth % (Graded)', fontsize=14, fontweight='bold')
ax.axvline(0, color='black', linewidth=0.8)
ax.grid(axis='x', alpha=0.3)

# Add percentage labels
for i, v in enumerate(top_15['Growth %']):
    ax.text(v + 5 if v > 0 else v - 5, i, f"{v:.1f}%", 
            va='center', fontsize=9, fontweight='bold')

plt.tight_layout()
plt.show()

print(f"\nKey Insight: {top_15.iloc[-1]['Card']} showed {top_15.iloc[-1]['Growth %']:.1f}% growth")

In [None]:
# Price Evolution Over Time - Top 3 Pokemon Cards
fig, ax = plt.subplots(figsize=(14, 8))

# Get top 3 cards by growth
top_3_cards = pk_growth_df.head(3)['Card'].values

colors = ['#1f77b4', '#ff7f0e', '#2ca02c']
for idx, card in enumerate(top_3_cards):
    card_data = df_pk[df_pk['Card Name'] == card].sort_values('Date')
    ax.plot(card_data['Date'], card_data['Graded'], 
            marker='o', linewidth=2.5, markersize=6, 
            label=card, color=colors[idx], alpha=0.8)

ax.set_xlabel('Date', fontsize=12, fontweight='bold')
ax.set_ylabel('Graded Price ($)', fontsize=12, fontweight='bold')
ax.set_title('Price Evolution: Top 3 Pokemon Cards by Growth', 
             fontsize=14, fontweight='bold')
ax.legend(loc='best', fontsize=10, framealpha=0.9)
ax.grid(True, alpha=0.3)
ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x:,.0f}'))

# Rotate x-axis labels
plt.setp(ax.get_xticklabels(), rotation=45, ha='right')

plt.tight_layout()
plt.show()

print("\nPrice trajectories show distinct growth patterns across top performers")

---
## Part 2: Star Wars Figure Growth Analysis
---

In [None]:
# Load Star Wars data
df_sw = pd.read_csv('starwars_filtered.csv')

# Filter for authentic (graded) figures only
df_sw = df_sw[df_sw['authenticity_n'] == 1]
df_sw = df_sw[df_sw['selling_price'] > 0]

print(f"Total records: {len(df_sw):,}")
print(f"Unique figures: {df_sw['figure'].nunique()}")
print(f"Year range: {df_sw['year'].min()} to {df_sw['year'].max()}")

In [None]:
# Calculate growth for each figure
sw_growth_results = []

for figure in df_sw['figure'].unique():
    figure_data = df_sw[df_sw['figure'] == figure].sort_values('year')
    
    if len(figure_data) >= 2:
        # Get earliest and latest year data
        first_year = figure_data['year'].min()
        last_year = figure_data['year'].max()
        
        first_year_data = figure_data[figure_data['year'] == first_year]
        last_year_data = figure_data[figure_data['year'] == last_year]
        
        first_price = first_year_data['selling_price'].mean()
        last_price = last_year_data['selling_price'].mean()
        
        if first_price > 0:
            pct_growth = ((last_price - first_price) / first_price) * 100
            abs_growth = last_price - first_price
            
            sw_growth_results.append({
                'Figure': figure,
                'First Price': first_price,
                'Last Price': last_price,
                'Absolute Growth': abs_growth,
                'Growth %': pct_growth,
                'First Year': first_year,
                'Last Year': last_year,
                'Years': last_year - first_year
            })

sw_growth_df = pd.DataFrame(sw_growth_results).sort_values('Growth %', ascending=False)
print(f"\nAnalyzed {len(sw_growth_df)} Star Wars figures with sufficient data")

In [None]:
print("TOP 10 STAR WARS FIGURES BY GROWTH %")
print("="*80)

for idx, row in sw_growth_df.head(10).iterrows():
    print(f"\n{row['Figure']}")
    print(f"  {int(row['First Year'])}: ${row['First Price']:,.2f} → "
          f"{int(row['Last Year'])}: ${row['Last Price']:,.2f}")
    print(f"  Growth: {row['Growth %']:+.1f}% (${row['Absolute Growth']:,.2f})")
    print(f"  Period: {int(row['Years'])} years")

In [None]:
print("\nBOTTOM 10 STAR WARS FIGURES BY GROWTH % (Biggest Declines)")
print("="*80)

for idx, row in sw_growth_df.tail(10).iterrows():
    print(f"\n{row['Figure']}")
    print(f"  {int(row['First Year'])}: ${row['First Price']:,.2f} → "
          f"{int(row['Last Year'])}: ${row['Last Price']:,.2f}")
    print(f"  Growth: {row['Growth %']:+.1f}% (${row['Absolute Growth']:,.2f})")
    print(f"  Period: {int(row['Years'])} years")

In [None]:
# Visualize top 15 growers
fig, ax = plt.subplots(figsize=(12, 8))

top_15_sw = sw_growth_df.head(15).sort_values('Growth %')
colors = ['green' if x > 0 else 'red' for x in top_15_sw['Growth %']]

ax.barh(range(len(top_15_sw)), top_15_sw['Growth %'], color=colors, alpha=0.7, edgecolor='black')
ax.set_yticks(range(len(top_15_sw)))
ax.set_yticklabels(top_15_sw['Figure'], fontsize=9)
ax.set_xlabel('Growth %', fontsize=12)
ax.set_title('Top 15 Star Wars Figures by Price Growth % (Authentic)', fontsize=14, fontweight='bold')
ax.axvline(0, color='black', linewidth=0.8)
ax.grid(axis='x', alpha=0.3)

# Add percentage labels
for i, v in enumerate(top_15_sw['Growth %']):
    ax.text(v + max(top_15_sw['Growth %'])*0.02 if v > 0 else v - max(top_15_sw['Growth %'])*0.02, 
            i, f"{v:.1f}%", va='center', fontsize=8, fontweight='bold')

plt.tight_layout()
plt.show()

print(f"\nKey Insight: {top_15_sw.iloc[-1]['Figure']} showed {top_15_sw.iloc[-1]['Growth %']:.1f}% growth")

In [None]:
# Price Evolution Over Time - Top 3 Star Wars Figures
fig, ax = plt.subplots(figsize=(14, 8))

# Get top 3 figures by growth
top_3_figures = sw_growth_df.head(3)['Figure'].values

colors = ['#d62728', '#9467bd', '#8c564b']
for idx, figure in enumerate(top_3_figures):
    figure_data = df_sw[df_sw['figure'] == figure].groupby('year')['selling_price'].mean().reset_index()
    ax.plot(figure_data['year'], figure_data['selling_price'], 
            marker='s', linewidth=2.5, markersize=7, 
            label=figure, color=colors[idx], alpha=0.8)

ax.set_xlabel('Year', fontsize=12, fontweight='bold')
ax.set_ylabel('Average Selling Price ($)', fontsize=12, fontweight='bold')
ax.set_title('Price Evolution: Top 3 Star Wars Figures by Growth (Authentic)', 
             fontsize=14, fontweight='bold')
ax.legend(loc='best', fontsize=10, framealpha=0.9)
ax.grid(True, alpha=0.3)
ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x:,.2f}'))

plt.tight_layout()
plt.show()

print("\nPrice trajectories reveal long-term appreciation trends in vintage Star Wars collectibles")

---
## Summary: Cross-Market Comparison
---

In [None]:
print("HIGHEST GROWTH WINNERS")
print("="*80)

print(f"\nPokemon Card Winner:")
pk_winner = pk_growth_df.iloc[0]
print(f"  {pk_winner['Card']}")
print(f"  Growth: {pk_winner['Growth %']:+.1f}% over {pk_winner['Months']:.1f} months")
print(f"  ${pk_winner['First Price']:,.0f} → ${pk_winner['Last Price']:,.0f}")

print(f"\nStar Wars Figure Winner:")
sw_winner = sw_growth_df.iloc[0]
print(f"  {sw_winner['Figure']}")
print(f"  Growth: {sw_winner['Growth %']:+.1f}% over {int(sw_winner['Years'])} years")
print(f"  ${sw_winner['First Price']:,.2f} → ${sw_winner['Last Price']:,.2f}")

print("\n" + "="*80)
print("\nKey Observations:")
print(f"• Pokemon median growth: {pk_growth_df['Growth %'].median():.1f}%")
print(f"• Star Wars median growth: {sw_growth_df['Growth %'].median():.1f}%")
print(f"• Pokemon cards with positive growth: {(pk_growth_df['Growth %'] > 0).sum()} of {len(pk_growth_df)}")
print(f"• Star Wars figures with positive growth: {(sw_growth_df['Growth %'] > 0).sum()} of {len(sw_growth_df)}")