In [21]:
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats

In [22]:
from statsmodels.stats.proportion import proportion_confint
from statsmodels.stats.proportion import proportions_ztest
from statsmodels.stats.weightstats import ztest
from scipy.stats import poisson, norm, t, binom

In [23]:
'# Star Wars & Pokemon App'

st.header('Star Wars & Pokemon Dashboard')

# Reading in data and defining variables
starwars_moc_loose = pd.read_csv('C:/Users/amyby/Desktop/Bootcamp/final_project/data/starwars_moc_loose_v7.csv')
pokemon_final_26 = pd.read_csv('C:/Users/amyby/Desktop/Bootcamp/final_project/data/final_dataset_csv.csv')
pokemon_medians = pd.read_csv('C:/Users/amyby/Desktop/Bootcamp/final_project/pokemons_medians_df.csv')
starwars_medians = pd.read_csv('C:/Users/amyby/Desktop/Bootcamp/final_project/starwars_medians_df.csv')
starwars_year_figure = pd.read_csv('C:/Users/amyby/Desktop/Bootcamp/final_project/starwars_year_figure.csv')
pokemon_year_card = pd.read_csv('C:/Users/amyby/Desktop/Bootcamp/final_project/pokemon_year_card_df.csv')
pokemon_returns = pokemon_year_card['YoY_Growth'].replace([np.inf, -np.inf], np.nan).dropna()
starwars_returns = starwars_year_figure['YoY_Growth'].dropna()
sw_returns_list = starwars_returns.to_list()
poke_returns_list = pokemon_returns.to_list()




In [24]:
#H1 - rejecting H0 for both Star Wars & Pokemon Data (Returns are not statistically significantly different from annual inflation rate of 2%)
exp_return = 0.02 # in %

# Perform one-sample t-test for SW data
sw_t_stat, sw_p_value = stats.ttest_1samp(sw_returns_list, exp_return)
# Perform one-sample t-test for Poke data
poke_t_stat, poke_p_value = stats.ttest_1samp(poke_returns_list, exp_return)
# Storing results in pandas df 
test_results_df = pd.DataFrame({
    'Franchise': ['Star Wars', 'Pokemon'],
    'T-Statistic': [sw_t_stat, poke_t_stat],
    'P-Value': [sw_p_value, poke_p_value]})


In [25]:
poke_returns_list

[-0.1745223977741697,
 -0.1622159644752517,
 -0.0239500676124655,
 0.4627059602649006,
 -1.0,
 -0.1287878787878787,
 -0.0689999999999999,
 -0.0659898192686685,
 -0.9704319408638816,
 -0.2272389069264069,
 1.1972077552628124,
 -0.3431997450402358,
 -0.3222235700855219,
 -0.1677927424045818,
 -0.0895209419861282,
 0.3345931262548718,
 -1.0,
 0.0054632729110086,
 -0.1705141493137347,
 0.0123485422188565,
 0.4264408174333416,
 -0.6329072435267126,
 -0.1508928571428571,
 -0.2085804416403786,
 0.0506749574829932,
 0.6106881812893599,
 0.4524613331239695,
 -0.1385999999999999,
 -0.1224719030616406,
 -0.0268803363796283,
 0.6520726321436183,
 -0.7280212081719072,
 -0.7423053020639125,
 -0.2352605191343529,
 0.1649792531120331,
 0.4539820487248895,
 0.9509088236735096,
 -0.0920140632847815,
 0.1007578271932736,
 0.599110530415337,
 0.957010197507974,
 3.886254516258531,
 -0.2613102896400326,
 -0.0701628725358911,
 -0.0650627254719959,
 0.387996878318386,
 -0.9721117153654716,
 0.749867794817556

In [26]:
poke_p_value

np.float64(0.04302090449261267)

In [27]:
#H1 - rejecting H0 for both Star Wars & Pokemon Data (Returns are not statistically significantly different from annual inflation rate of 2%)
exp_return = 0.02 # in %

# Perform one-sample t-test for SW data
sw_t_stat, sw_p_value = stats.ttest_1samp(sw_returns_list, exp_return)
# Perform one-sample t-test for Poke data
poke_t_stat, poke_p_value = stats.ttest_1samp(poke_returns_list, exp_return)
# Storing results in pandas df 
test_results_df = pd.DataFrame({
    'Franchise': ['Star Wars', 'Pokemon'],
    'T-Statistic': [sw_t_stat, poke_t_stat],
    'P-Value': [sw_p_value, poke_p_value]})

#Visualising results - Star Wars
fig, ax1 = plt.subplots(figsize=(10, 6))

# 🔹 Left Y-axis: Median Selling Price
sns.lineplot(
    data=starwars_medians,
    x='year',
    y='selling_price',
    ax=ax1,
    marker='o',
    color='blue',
    label='Median Selling Price'
)
sns.lineplot(
    data=starwars_medians,
    x='year',
    y='expected_price',
    ax=ax1,
    marker='o',
    color='red',
    label='Expected Price (2% Return)'
)

ax1.set_ylabel('Selling Price', color='blue')
ax1.tick_params(axis='y', labelcolor='blue')

# Create a second Y-axis
ax2 = ax1.twinx()

#Right Y-axis: YoY Growth %
sns.lineplot(
    data=starwars_medians,
    x='year',
    y='YoY_Growth',
    ax=ax2,
    marker='o',
    alpha=0.3,
    color='green',
    label='YoY Growth'
)

ax2.set_ylabel('YoY Growth (%)', color='green')
ax2.tick_params(axis='y', labelcolor='green')

# 🔸 Titles and Grid
plt.title('Price development for Star Wars Figures')
ax1.grid(True)

# 🔹 Combine legends from both axes
lines, labels = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines + lines2, labels + labels2, loc='upper left')

plt.tight_layout()
plt.show()

plt.show()

# Visualising Results - Pokemon

fig, ax1 = plt.subplots(figsize=(10, 6))

# 🔹 Left Y-axis: Median Selling Price
sns.lineplot(
    data=pokemon_medians,
    x='year',
    y='Graded',
    ax=ax1,
    marker='o',
    color='blue',
    label='Median Selling Price'
)
sns.lineplot(
    data=pokemon_medians,
    x='year',
    y='expected_price',
    ax=ax1,
    marker='o',
    color='red',
    label='Expected Price (2% Return)'
)

ax1.set_ylabel('Selling Price', color='blue')
ax1.tick_params(axis='y', labelcolor='blue')

# Create a second Y-axis
ax2 = ax1.twinx()

#Right Y-axis: YoY Growth %
sns.lineplot(
    data=pokemon_medians,
    x='year',
    y='YoY_Growth',
    ax=ax2,
    marker='o',
    alpha=0.3,
    color='green',
    #label='YoY Growth'
)

ax2.set_ylabel('YoY Growth (%)', color='green')
ax2.tick_params(axis='y', labelcolor='green')

# 🔸 Titles and Grid
plt.title('Graded Price development for Pokemon Cards')
ax1.grid(True)

# 🔹 Combine legends from both axes
lines, labels = ax1.get_legend_handles_labels()
#lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines + lines2, labels + labels2, loc='upper left')

plt.tight_layout()
plt.show()

plt.show()

  plt.show()
  plt.show()
  plt.show()
  plt.show()


In [28]:
# Testing Hypothesis for H2: Market behavior varies by condition and character type. Mint-on-card and iconic figures show higher stability and appreciation.
np.random.seed(42)

n = 100

In [29]:
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Define the model for growth groups starwars
model = ols(
    'total_growth ~ C(condition) + C(character_type) + C(authenticity_n) '
    '+ C(condition):C(character_type) '
    '+ C(condition):C(authenticity_n) '
    '+ C(authenticity_n):C(character_type) '
    '+ C(condition):C(character_type):C(authenticity_n)',
    data=starwars_moc_loose
).fit()

# Run ANOVA
anova_table_growth_sw = sm.stats.anova_lm(model, typ=2)
anova_table_growth_sw = anova_table_growth_sw.reset_index()

In [30]:
anova_table_growth_sw

Unnamed: 0,index,sum_sq,df,F,PR(>F)
0,C(condition),57400320.0,1.0,42.444564,7.337563e-11
1,C(character_type),1816948000.0,6.0,223.923137,1.948742e-283
2,C(authenticity_n),2600570.0,1.0,1.922987,0.1655337
3,C(condition):C(character_type),361956900.0,6.0,44.60806,9.637922000000001e-55
4,C(condition):C(authenticity_n),4515174.0,1.0,3.338738,0.06767223
5,C(authenticity_n):C(character_type),45662250.0,6.0,5.627479,7.500548e-06
6,C(condition):C(character_type):C(authenticity_n),12659090.0,6.0,1.560124,0.1543079
7,Residual,70368690000.0,52034.0,,


In [31]:
anova_table_growth_sw

Unnamed: 0,index,sum_sq,df,F,PR(>F)
0,C(condition),57400320.0,1.0,42.444564,7.337563e-11
1,C(character_type),1816948000.0,6.0,223.923137,1.948742e-283
2,C(authenticity_n),2600570.0,1.0,1.922987,0.1655337
3,C(condition):C(character_type),361956900.0,6.0,44.60806,9.637922000000001e-55
4,C(condition):C(authenticity_n),4515174.0,1.0,3.338738,0.06767223
5,C(authenticity_n):C(character_type),45662250.0,6.0,5.627479,7.500548e-06
6,C(condition):C(character_type):C(authenticity_n),12659090.0,6.0,1.560124,0.1543079
7,Residual,70368690000.0,52034.0,,


In [32]:
# Define the model for volatility groups starwars
model = ols(
    'volatility_y ~ C(condition) + C(character_type) + C(authenticity_n) '
    '+ C(condition):C(character_type) '
    '+ C(condition):C(authenticity_n) '
    '+ C(authenticity_n):C(character_type) '
    '+ C(condition):C(character_type):C(authenticity_n)',
    data=starwars_moc_loose
).fit()

# Run ANOVA
anova_table_volatility_sw = sm.stats.anova_lm(model, typ=2)
anova_table_volatility_sw = anova_table_volatility_sw.reset_index()


In [33]:
# Visualise H2 for starwars data - growth
sw_groupby_condition = starwars_moc_loose.groupby(['year','condition'])['selling_price'].median().reset_index().sort_values(['condition', 'year'])
sw_groupby_condition['yoy_growth'] = sw_groupby_condition['selling_price'].pct_change()
sw_groupby_condition = sw_groupby_condition.dropna()
sw_groupby_condition

Unnamed: 0,year,condition,selling_price,yoy_growth
6,2014,0,42.0,0.4
8,2015,0,46.34,0.103333
10,2016,0,52.275,0.128075
12,2017,0,62.0,0.186035
14,2018,0,60.99,-0.01629
16,2019,0,58.355,-0.043204
18,2020,0,73.13,0.253192
20,2021,0,106.685,0.45884
22,2022,0,108.67,0.018606
24,2023,0,105.815,-0.026272


In [34]:

fig, ax1 = plt.subplots(figsize=(10, 6))

# 🔹 Left Y-axis: Median Selling Price by Condition
sns.lineplot(
    data=sw_groupby_condition,
    x='year',
    y='selling_price',
    hue='condition',
    ax=ax1,
    marker='o'
)

ax1.set_ylabel('Selling Price', color='blue')
ax1.tick_params(axis='y', labelcolor='blue')

# Create a second Y-axis
ax2 = ax1.twinx()

# 🔸 Right Y-axis: YoY Growth by Condition
sns.lineplot(
    data=sw_groupby_condition,
    x='year',
    y='yoy_growth',
    hue='condition',
    ax=ax2,
    marker='o',
    alpha=0.3,
    linestyle='--'
)

ax2.set_ylabel('YoY Growth (%)', color='green')
ax2.tick_params(axis='y', labelcolor='green')

# 🔸 Title and Grid
plt.title('Price Development and YoY Growth by Condition')
ax1.grid(True)

# 🔹 Combine Legends from both axes
# Get legend handles and labels from both axes
handles1, labels1 = ax1.get_legend_handles_labels()
handles2, labels2 = ax2.get_legend_handles_labels()

# Remove duplicate labels (optional)
from collections import OrderedDict
combined = OrderedDict(zip(labels1 + labels2, handles1 + handles2))

# Show combined legend
ax1.legend(combined.values(), combined.keys(), loc='upper left')

plt.tight_layout()
st.pyplot(fig)



DeltaGenerator()

In [35]:
sw_groupby_condition

Unnamed: 0,year,condition,selling_price,yoy_growth
6,2014,0,42.0,0.4
8,2015,0,46.34,0.103333
10,2016,0,52.275,0.128075
12,2017,0,62.0,0.186035
14,2018,0,60.99,-0.01629
16,2019,0,58.355,-0.043204
18,2020,0,73.13,0.253192
20,2021,0,106.685,0.45884
22,2022,0,108.67,0.018606
24,2023,0,105.815,-0.026272


In [36]:
import os
print("Image exists?", os.path.exists('Users/amyby/Desktop/Bootcamp/final_project/images/pikachu.jpg'))

Image exists? False


In [37]:
pokemon_year_card = pd.merge(
    pokemon_year_card,
    pokemon_final_26[['Card Name', 'Rarity', 'Set Name']],
    on='Card Name',
    how='left'
)

In [38]:
pokemon_year_card.nunique()

Card Name          26
Unnamed: 1        116
Card Name.1        26
year                6
Graded            114
YoY_Growth        113
expected_price    110
Rarity             12
Set Name           15
dtype: int64

In [39]:
pokemon_year_card = pokemon_year_card.rename(columns={'Set Name': 'Set_Name'})

In [40]:
pokemon_year_card.columns= pokemon_year_card.columns.str.lower()

In [41]:
sw_groupby_condition

Unnamed: 0,year,condition,selling_price,yoy_growth
6,2014,0,42.0,0.4
8,2015,0,46.34,0.103333
10,2016,0,52.275,0.128075
12,2017,0,62.0,0.186035
14,2018,0,60.99,-0.01629
16,2019,0,58.355,-0.043204
18,2020,0,73.13,0.253192
20,2021,0,106.685,0.45884
22,2022,0,108.67,0.018606
24,2023,0,105.815,-0.026272


In [42]:
sw_returns_list

[-0.775,
 0.1206275720164609,
 -0.8843240761992196,
 5.126488095238096,
 0.3530888187191321,
 0.1151268549545236,
 0.180725477570294,
 -0.1762861297945828,
 0.2939586206896551,
 -0.2212083741951989,
 0.5412537640295647,
 0.2006678270754147,
 -0.0318934911242603,
 0.0197115090764621,
 -0.1651631851829651,
 0.3647329121194715,
 0.0029461279461278,
 -0.5716796055392362,
 0.5914518400587838,
 0.0516737206617932,
 -0.1377455822631984,
 0.073489477257298,
 0.3174308300395256,
 0.140049803486244,
 0.432037685202242,
 0.1828138782710968,
 -0.2456963519980113,
 -0.3165808444902163,
 0.325979505726341,
 -0.9513137557959814,
 2.017273576097105,
 -0.0592604053844963,
 0.1225328947368422,
 0.1573626373626375,
 -0.0758323838460566,
 0.0695890410958905,
 0.2950819672131146,
 0.3988330696202529,
 0.1803464121597737,
 -0.2518567321514134,
 0.0407493395244575,
 -0.097076923076923,
 -0.1167149429204292,
 -0.2536651234567902,
 0.0982165934349963,
 0.0278889150388326,
 0.1423010875787065,
 0.02224894768490

In [43]:
pokemon_year_card = pokemon_year_card.replace([np.inf, -np.inf], np.nan).dropna()

In [44]:
pokemon_year_card.columns = pokemon_year_card.columns.str.lower()

In [45]:
pokemon_year_card

Unnamed: 0,card name,unnamed: 1,card name.1,year,graded,yoy_growth,expected_price,rarity,set_name
59,Blastoise Base Set,2,Blastoise Base Set,2022,461650.0,-0.174522,570437.040000,Rare Holo,Base Set
60,Blastoise Base Set,2,Blastoise Base Set,2022,461650.0,-0.174522,570437.040000,Rare Holo,Base Set
61,Blastoise Base Set,2,Blastoise Base Set,2022,461650.0,-0.174522,570437.040000,Rare Holo,Base Set
62,Blastoise Base Set,2,Blastoise Base Set,2022,461650.0,-0.174522,570437.040000,Rare Holo,Base Set
63,Blastoise Base Set,2,Blastoise Base Set,2022,461650.0,-0.174522,570437.040000,Rare Holo,Base Set
...,...,...,...,...,...,...,...,...,...
5974,Venusaur Base Set,121,Venusaur Base Set,2025,314455.5,0.216241,423651.499454,Rare Holo,Base Set
5975,Venusaur Base Set,121,Venusaur Base Set,2025,314455.5,0.216241,423651.499454,Rare Holo,Base Set
5976,Venusaur Base Set,121,Venusaur Base Set,2025,314455.5,0.216241,423651.499454,Rare Holo,Base Set
5977,Venusaur Base Set,121,Venusaur Base Set,2025,314455.5,0.216241,423651.499454,Rare Holo,Base Set


In [46]:
import statsmodels.api as sm
from statsmodels.formula.api import ols
import streamlit as st

# Ensure categorical variables are properly formatted as category dtype
pokemon_year_card['graded'] = pokemon_year_card['graded'].astype('category')
pokemon_year_card['rarity'] = pokemon_year_card['rarity'].astype('category')
pokemon_year_card['set_name'] = pokemon_year_card['set_name'].astype('category')

# Define the model using lowercase variables
model = ols(
    'yoy_growth ~ graded + C(rarity) + C(set_name) + graded:C(rarity) + graded:C(set_name)',
    data=pokemon_year_card
).fit()


# Run ANOVA
anova_table_growth_poke = sm.stats.anova_lm(model, typ=2)

# Display ANOVA table in Streamlit
st.dataframe(anova_table_growth_poke)



  F /= J


DeltaGenerator()

In [47]:
anova_table_growth_poke

Unnamed: 0,sum_sq,df,F,PR(>F)
graded,-6.1037640000000005e-18,110.0,-3.7349560000000005e-17,1.0
C(rarity),,11.0,,
C(set_name),,14.0,,
graded:C(rarity),5.657802e-11,1210.0,3.147334e-11,1.0
graded:C(set_name),9.032034e-12,1540.0,3.94771e-12,1.0
Residual,8.463808,5697.0,,


In [None]:
pokemon_final_26.columns = pokemon_final_26.columns.str.lower()


In [None]:
pokemon_final26_volatility = pokemon_final_26.groupby('card name')['graded'].std()
pokemon_final_26 = pd.merge(
    pokemon_final_26,
    pokemon_final26_volatility,
    on='card name',
    how='left'
)
pokemon_final_26.rename(columns={'graded_y': 'volatility'}, inplace=True)
pokemon_final_26.rename(columns={'graded_x': 'graded'}, inplace=True)
pokemon_final_26.rename(columns={'set name': 'set_name'}, inplace=True)

In [67]:
# Define the model using lowercase variables
model = ols(
    'volatility ~ graded + C(rarity) + C(set_name) + graded:C(rarity) + graded:C(set_name)',
    data=pokemon_final_26
).fit()


# Run ANOVA
anova_table_volatility_poke = sm.stats.anova_lm(model, typ=2)




In [69]:
anova_table_volatility_poke

Unnamed: 0,sum_sq,df,F,PR(>F)
C(rarity),905337300000.0,11.0,457.6002,0.0
C(set_name),104566400000.0,14.0,41.52718,2.538376e-93
graded,0.0852321,1.0,4.738836e-10,0.9999826
graded:C(rarity),2063846000.0,11.0,1.043165,0.3526465
graded:C(set_name),2626713000.0,14.0,1.043165,0.3526465
Residual,223204700000.0,1241.0,,
