In [None]:
"""
markets.ipynb

Jupyter Notebook to perform analysis on card market data.

Author: Jordan Bourdeau
Date Created: 4/7/24
"""

In [None]:
%matplotlib inline
# Imports
import datetime
from importlib import reload
import json 
from matplotlib import pyplot as plt
import numpy as np
import os 
import pandas as pd
import seaborn as sns

from src import constants as c
import src.calculate.calculate_market_data as cmd
import src.load.load_price_data as lpd
import src.load.load_set_data as lsd
import src.load.load_tournament_data as ltd
import src.load.load_utils as load_utils
import src.plot.plot_set_data as psd

### Get a DataFrame with a card name, lowest price + the set it's from, UUID, and release year

Note: Some cards may not have a MTGO online version, for instance if there is a full art version only sold in paper copies.

In [None]:
# Load dataset
reload(load_utils)

all_printings_filepath: str = os.path.join(c.DATA_DIRECTORY, 'AllPrintings.json')
all_printings: dict = load_utils.load_json_data(all_printings_filepath)

In [None]:
reload(lpd)

card_price_df: pd.DataFrame = lpd.load_card_price_df()
card_price_df.rename(columns={'card': 'card_name'}, inplace=True)
card_price_df.head()

In [None]:
reload(cmd)

aggregate_data: pd.DataFrame = cmd.calculate_aggregate_set_prices(card_price_df)
aggregate_data

In [None]:
# Aggregate the aggregated data
# Group by 'set_code' and aggregate mean, median, and std
aggregate_data.agg({
    'mean_price': ['mean', 'median', 'std'],
    'median_price': ['mean', 'median', 'std'],
    'std_price': ['mean', 'median', 'std']
})

In [None]:
reload(psd)
%matplotlib inline

legend_params: list[tuple] = [
    (1993, 'green', '--', 'Magic First Comes Out (1993)'),
    (2011, 'orange', '--', 'Start of Modern Format (2011)'),
    (2019, 'red', '--', 'Fire Design Principle Implemented (2019)'),
    (2021, 'blue', '--', 'Modern Horizons 2 Released (2021)'),
]

psd.plot_average_card_price_over_time(card_price_df, datetime.date(2024, 3, 23), legend_params=legend_params)

In [None]:
reload(lsd)

# Augmenting set data with tournament data and number of banend cards
augmented_data: pd.DataFrame = lsd.load_augmented_set_data(all_printings, 'modern')
augmented_data[augmented_data['set_code'] == 'MH2']

In [None]:
reload(psd)

legend_params: list[tuple] = [
    (2003, 'green', '--', 'Earliest Modern Legal Set (2003)'),
    (2019, 'red', '--', 'Fire Design Principle Implemented (2019)'),
    (2021, 'blue', '--', 'Modern Horizons 2 Released (2021)'),
]

metrics: list[str] = ['mean_price', 'std_price']
psd.plot_superimposed_max_min_metrics_by_year(augmented_data, metrics, datetime.date(2024, 3, 23), 'modern', legend_params=legend_params)

In [None]:
reload(psd)

legend_params: list[tuple] = [
    (2003, 'green', '--', 'Earliest Modern Legal Set (2003)'),
    (2019, 'red', '--', 'Fire Design Principle Implemented (2019)'),
    (2021, 'blue', '--', 'Modern Horizons 2 Released (2021)'),
]

metrics: list[str] = ['mean_price', 'median_price', 'std_price', 'num_banned', 'total_count']
psd.plot_max_min_metrics_by_year(augmented_data, metrics, datetime.date(2024, 3, 23), 'modern', legend_params)

In [None]:
base_path: str = os.path.join(c.DATA_DIRECTORY, '2023')
modern_card_counts_df, player_counts_df = ltd.load_format_card_counts(base_path, 'modern')
modern_card_counts_df

In [None]:
# Merge the card data with its tournament usage and get a relative frequency of use
card_price_and_usage: pd.DataFrame = card_price_df.merge(modern_card_counts_df, on='card_name')
total_count: int = card_price_and_usage['total_count'].sum()
card_price_and_usage['percent'] = card_price_and_usage['total_count'] / total_count * 100
card_price_and_usage

In [None]:
# Repeat analysis for 1-12-2024, the day before the first 2024 set was released
all_prices_today: dict = load_utils.load_json_data(os.path.join(c.DATA_DIRECTORY, 'AllPricesToday.json'))

In [None]:
all_prices_today['data']['00010d56-fe38-5e35-8aed-518019aa36a5']

In [None]:
# Repeat analysis for 1-12-2024, the day before the first 2024 set was released
all_prices_master: dict = load_utils.load_json_data(os.path.join(c.DATA_DIRECTORY, 'AllPrices.json'))

In [None]:
all_prices_master['data']

In [None]:
lowest_price_df_from_data: pd.DataFrame = pd.read_csv(os.path.join(c.DATA_DIRECTORY, c.CACHE, 'lowest_price_printings_2024-01-12.csv'))
set_release_years: pd.DataFrame = pd.read_csv(os.path.join(c.DATA_DIRECTORY, c.CACHE, 'set_release_years.csv'))
set_release_years.head()

In [None]:
reload(lpd)

card_price_df_from_date: pd.DataFrame = lpd.load_card_price_df(date_string='2024-01-12')

In [None]:
# Repeat analysis with data from the specific date

reload(psd)
%matplotlib inline

legend_params: list[tuple] = [
    (1993, 'green', '--', 'Magic First Comes Out (1993)'),
    (2011, 'orange', '--', 'Start of Modern Format (2011)'),
    (2019, 'red', '--', 'Fire Design Principle Implemented (2019)'),
    (2021, 'blue', '--', 'Modern Horizons 2 Released (2021)'),
]

psd.plot_average_card_price_over_time(card_price_df_from_date, datetime.date(2024, 1, 12), legend_params=legend_params)

In [None]:
reload(lsd)

# Augmenting set data with tournament data and number of banend cards
augmented_data_from_date: pd.DataFrame = lsd.load_augmented_set_data(all_printings, 'modern', date_string='2024-01-12')
augmented_data_from_date[augmented_data['set_code'] == 'MH2']

In [None]:
reload(psd)

legend_params: list[tuple] = [
    (2003, 'green', '--', 'Earliest Modern Legal Set (2003)'),
    (2019, 'red', '--', 'Fire Design Principle Implemented (2019)'),
    (2021, 'blue', '--', 'Modern Horizons 2 Released (2021)'),
]

metrics: list[str] = ['mean_price', 'std_price']
psd.plot_superimposed_max_min_metrics_by_year(augmented_data_from_date, metrics, datetime.date(2024, 1, 12), 'modern', legend_params=legend_params)

In [None]:
reload(psd)

legend_params: list[tuple] = [
    (2003, 'green', '--', 'Earliest Modern Legal Set (2003)'),
    (2019, 'red', '--', 'Fire Design Principle Implemented (2019)'),
    (2021, 'blue', '--', 'Modern Horizons 2 Released (2021)'),
]

metrics: list[str] = ['mean_price', 'median_price', 'std_price', 'num_banned', 'total_count']
psd.plot_max_min_metrics_by_year(augmented_data_from_date, metrics, datetime.date(2024, 1, 12), 'modern', legend_params)

In [None]:
# # Create scatter plot with marginal histograms
# sns.jointplot(data=card_price_and_usage, x='price', y='percent', hue='rarity', kind='kde', marginal_ticks=True)
# plt.suptitle('% of Total Cards Played in Tournaments on Price and Rarity', y=1.02)
# plt.xlabel('Price ($)')
# plt.ylabel('% of Cards Played')
# plt.xlim(0, 5)
# plt.show()

In [None]:
# sns.kdeplot(data=card_price_and_usage, x='percent', y='price', hue='rarity', fill=True, cmap='viridis', levels=20, linewidths=1, alpha=0.5)
# plt.title('% of Total Cards Played in Tournaments on Price and Rarity')
# plt.ylabel('Price ($)')
# plt.xlabel('% of Cards Played')
# plt.xlim(0, 1.5)
# plt.ylim(0, 5)
# plt.show()