In [None]:
from yugiquery import *
header('Speed Duel')

---

# Data aquisition

## Fetch online data

In [None]:
# Timestamp
timestamp = pd.Timestamp.now()

In [None]:
# Fetch skill cards
skill_df = fetch_skill()

# Fetch deck cards
speed_df = fetch_speed()

## Merge data

In [None]:
full_df = pd.concat([speed_df,skill_df], ignore_index=True, axis=0).sort_values('Name').reset_index(drop=True)
print('Data merged')

## Save data

In [None]:
full_df.to_csv(f'../data/all_speed_{timestamp.isoformat(timespec="minutes")}.csv', index=False)
print('Data saved')

# Check changes

## Load previous data

In [None]:
# Get list of files
files_list = sorted(glob.glob('../data/all_speed_*.csv'), key=os.path.getctime, reverse=True)
# Get second newest file if exist
if len(files_list)>1:
    # Load csv avoiding converting "NA" to NaN
    previous_df = pd.read_csv(files_list[1], dtype=object, keep_default_na=False, na_values=[''])
    # Correct tuples
    previous_df['Secondary type'] = previous_df['Secondary type'].dropna().apply(literal_eval)
    previous_df['Effect type'] = previous_df['Effect type'].dropna().apply(literal_eval)
    previous_df['Archseries'] = previous_df['Archseries'].dropna().apply(literal_eval)
    previous_df['Artwork'] = previous_df['Artwork'].dropna().apply(literal_eval)
    # previous_df['Errata'] = previous_df['Errata'].dropna().apply(literal_eval)
    # Force dtypes to match current df
    previous_df = previous_df.astype(full_df[previous_df.columns].dtypes.to_dict())
    previous_ts = pd.to_datetime(os.path.basename(files_list[1]).split('_')[-1].rstrip('.csv'))
    print('File loaded')
else:
    previous_df = None
    print('No older files')

## Generate changelog

In [None]:
if previous_df is None:
    print('Skipped')
else:
    changelog = generate_changelog(previous_df, full_df, col = 'Name')
    if not changelog.empty:
        display(changelog)
        changelog.to_csv(f'../data/speed_changelog_{timestamp.isoformat(timespec="minutes")}_{previous_ts.isoformat(timespec="minutes")}.csv', index = True)
        print('Changelog saved')

# Data visualization

In [None]:
full_df

## Property

In [None]:
full_df['Property'].nunique()

In [None]:
full_df['Property'].value_counts()

## Archseries

In [None]:
full_df['Archseries'].explode().nunique()

In [None]:
full_df['Archseries'].explode().value_counts()

## TCG Speed Duel status

In [None]:
full_df['TCG Speed Duel status'].nunique()

In [None]:
full_df['TCG Speed Duel status'].value_counts()

## TCG status

In [None]:
full_df['TCG status'].nunique()

In [None]:
full_df['TCG status'].value_counts()

## Character

In [None]:
skill_df['Character'].nunique()

In [None]:
skill_df['Character'].value_counts()

# Epilogue

In [None]:
footer()

## HTML export

In [None]:
# Save notebook on disck before generating HTML report
save_notebook()

In [None]:
! jupyter nbconvert Speed.ipynb --output-dir='../' --to=HTML --TagRemovePreprocessor.enabled=True --TagRemovePreprocessor.remove_cell_tags='exclude' --TemplateExporter.exclude_input=True --TemplateExporter.exclude_input_prompt=True --TemplateExporter.exclude_output_prompt=True

## Git

In [None]:
! git add "../*[Ss]peed*"

In [None]:
! git commit -m {"'Speed duel update-" + timestamp.isoformat() + "'"}