# Import Required Libraries
Import the necessary libraries, including pandas and matplotlib.

In [18]:
# Import Required Libraries
import json
import pandas as pd
import matplotlib.pyplot as plt

# Load the Data
Load the JSON data from data/raw/yugiohdb/cards.json into a pandas DataFrame.

In [20]:
# Load the Data
with open('../../data/raw/yugiohdb/cards.json') as file:
    data = json.load(file)

# Convert JSON data to pandas DataFrame
df = pd.DataFrame(data)

# Display the first few rows of the DataFrame
df.head()

Unnamed: 0,card_name,card_sets,card_icon,extracted_date,card_attribute,card_link,card_atk,card_def,card_level,card_rank,card_pendulum_scale
0,Recoded Alive,"[{'set_release_date': '2024-12-06', 'set_card_...",Normal Trap,2024-11-27,,,,,,,
1,Binary Sorceress,"[{'set_release_date': '2024-12-06', 'set_card_...",,2024-11-27,Earth,Link 2,1600.0,-,,,
2,Encode Talker,"[{'set_release_date': '2024-12-06', 'set_card_...",,2024-11-27,Light,Link 3,2300.0,-,,,
3,Boot Staggered,"[{'set_release_date': '2024-12-06', 'set_card_...",,2024-11-27,Light,,2300.0,500,Level 5,,
4,Deep Dark Trap Hole,"[{'set_release_date': '2023-03-10', 'set_card_...",Normal Trap,2024-11-27,,,,,,,


# Data Preprocessing
Clean and preprocess the data, handling any missing or inconsistent values.

In [21]:
# Data Preprocessing
# Convert 'set_release_date' to datetime format
df['set_release_date'] = pd.to_datetime(df['set_release_date'], errors='coerce')

# Drop rows with missing 'set_release_date'
df = df.dropna(subset=['set_release_date'])

# Reset index after dropping rows
df = df.reset_index(drop=True)

# Display the first few rows of the cleaned DataFrame
df.head()

KeyError: 'set_release_date'

# Extract Release Dates
Extract the release dates of the cards from the DataFrame.

In [None]:
# Extract Release Dates
release_dates = df['set_release_date']

# Display the first few release dates
release_dates.head()

# Analyze Time Trends
Analyze the time trends of card releases, such as the number of cards released per year.

In [None]:
# Analyze Time Trends

# Extract the year from the 'set_release_date' column
df['release_year'] = df['set_release_date'].dt.year

# Group by year and count the number of cards released each year
cards_per_year = df.groupby('release_year').size()

# Plot the number of cards released per year
plt.figure(figsize=(12, 6))
cards_per_year.plot(kind='bar')
plt.title('Number of Cards Released Per Year')
plt.xlabel('Year')
plt.ylabel('Number of Cards Released')
plt.show()

# Visualize Time Trends
Create visualizations to display the time trends of card releases using matplotlib or seaborn.

In [None]:
# Visualize Time Trends

# Plot the number of cards released per year using seaborn for better aesthetics
import seaborn as sns

plt.figure(figsize=(14, 7))
sns.barplot(x=cards_per_year.index, y=cards_per_year.values, palette='viridis')
plt.title('Number of Cards Released Per Year')
plt.xlabel('Year')
plt.ylabel('Number of Cards Released')
plt.xticks(rotation=45)
plt.show()

# Plot the cumulative number of cards released over time
df['cumulative_count'] = df.groupby('release_year').cumcount() + 1
cumulative_releases = df.groupby('release_year')['cumulative_count'].max()

plt.figure(figsize=(14, 7))
sns.lineplot(x=cumulative_releases.index, y=cumulative_releases.values, marker='o')
plt.title('Cumulative Number of Cards Released Over Time')
plt.xlabel('Year')
plt.ylabel('Cumulative Number of Cards Released')
plt.xticks(rotation=45)
plt.show()