In [None]:
"""
sets.ipynb

Jupyter Notebook to perform analysis on card sets.

Author: Jordan Bourdeau, Casey Forey
Date Created: 4/7/24
"""

In [None]:
# Imports
from importlib import reload
import json
from matplotlib import pyplot as plt
import numpy as np
import os
import pandas as pd
from sklearn.ensemble import IsolationForest

from src import constants as c
from src.calculate import calculate_market_data as cmd
from src.calculate import calculate_set_data as csd
from src.load import load_card_data as lcd
from src.load import load_set_data as lsd
from src.load import load_utils
from src.plot import plot_set_data as psd

In [None]:
reload(load_utils)

# Load dataset
all_printings_filepath: str = os.path.join(c.DATA_DIRECTORY, 'AllPrintings.json')
all_printings: dict = load_utils.load_json_data(all_printings_filepath)

In [None]:
reload(lsd)

sets = lsd.load_set_and_release_year()
sets[sets['set_code'] == 'LTR']

In [None]:
reload(lsd)

lsd.save_format_set_ban_counts(all_printings, 'modern')

In [None]:
reload(lcd)

df = lcd.load_first_card_printing_in_format('modern', all_printings)
df

In [None]:
reload(lsd)

# Augmenting set data with tournament data and number of banend cards
set_card_usages_and_bans: pd.DataFrame = lsd.load_augmented_set_data(all_printings, 'modern')
set_card_usages_and_bans.head()

In [None]:
reload(csd)

np.random.seed(0)

# Selecting numerical columns to train the model on
columns_for_model = ['total_count', 'num_banned', 'set_size', 'mean_price', 'median_price', 'std_price']
data_for_model = set_card_usages_and_bans[columns_for_model]

outliers = csd.find_set_outliers(set_card_usages_and_bans, columns_for_model)

print(f'Found {len(outliers)} outliers')
outliers_after_fire_design: pd.DataFrame = outliers[outliers['release_year'] >= 2019]
print(f'{(len(outliers_after_fire_design) / len(outliers)) * 100:.2f}% of outlier sets came after the fire design principle.')
outliers

In [None]:
reload(psd)

psd.plot_outlier_distribution(outliers)

In [None]:
reload(psd)

psd.plot_set_table(set_card_usages_and_bans)