# Keywords Table

## Introduction

The purpose of this notebook is to process and upload keyword data from MTGJSON into the postgresql database mtg_db. This is done through the following steps:
- Download the json file from MTGJSON's file server
- Check the version and date of the json file
- Pre-process the dictionary and convert it into a dataframe
- Push the keywords dataframe to the database "raw_data" schema

## Python Libraries

In [8]:
import json
import requests
import lzma
import numpy      as     np
import pandas     as     pd
from   sqlalchemy import create_engine, Table, Column, MetaData, Text, Date, text
from   sqlalchemy.dialects.postgresql import insert

## Functions

In [9]:
# Function for showing the data and version of the MTGJSON data
def data_recency_check(data, json_type):

    """
    Extract and display the version and date metadata from an MTGJSON dataset,
    and return this information as a DataFrame along with the JSON type.

    Parameters
    ----------
    data : dict
        MTGJSON data loaded from a JSON file, expected to contain a 'meta' key
        with 'date' and 'version' fields.

    json_type : str
        A string indicating the type or name of the JSON dataset being processed.
        This will be included in the output DataFrame.

    Returns
    -------
    pd.DataFrame
        A DataFrame with a single row and columns:
        - 'json_type': The provided JSON dataset type/name.
        - 'latest_date': The date the MTGJSON data was last updated.
        - 'latest_version': The MTGJSON model version.
    """

    # Create a DataFrame for the output
    df = pd.DataFrame({'json_type'      : [json_type]
                      ,'latest_date'    : [data['meta']['date']]
                      ,'latest_version' : [data['meta']['version']]})

    # Returning the values directly
    return(df)

In [10]:
# Function for uploading the recency check

def recency_check_upload(schema_name, table_name, dataframe):
    
    """
    Uploads recency check data from a Pandas DataFrame into a PostgreSQL table 
    with upsert (insert or update) logic.

    Each row from the DataFrame is inserted into the target table. If a row with the 
    same `json_type` (primary key) already exists, the corresponding `latest_date` 
    and `latest_version` values are updated instead.

    Parameters
    ----------
    schema_name : str
        Name of the PostgreSQL schema where the table resides.
    table_name : str
        Name of the PostgreSQL table to update or insert into.
    dataframe : pandas.DataFrame
        DataFrame containing the recency check data with columns:
        - 'json_type' (str): Identifier for the JSON file type.
        - 'latest_date' (datetime.date): Date of the latest file.
        - 'latest_version' (str): Version string of the latest file.

    Notes
    -----
    - Requires a global SQLAlchemy `engine` object to be defined.
    - Uses PostgreSQL's ON CONFLICT clause for upsert behavior.
    """

    # Create a MetaData object
    metadata = MetaData(schema=schema_name)
    
    # Define the Table object matching your PostgreSQL table
    json_recency_table = Table(table_name
                              ,metadata
                              ,Column('json_type' ,Text ,primary_key = True)
                              ,Column('latest_date' ,Date)
                              ,Column('latest_version' ,Text))
    
    # Upsert each row from your DataFrame
    with engine.begin() as conn:
        
        # Iterate through rows of the DataFrame
        for _, row in dataframe.iterrows():
            
            # Create an insert statement for the current row
            stmt = insert(json_recency_table).values(json_type      = row['json_type']
                                                    ,latest_date    = row['latest_date']
                                                    ,latest_version = row['latest_version'])
            
            # Add upsert logic to update on conflict
            stmt = stmt.on_conflict_do_update(index_elements = ['json_type']
                                             ,set_           = {'latest_date'    : row['latest_date']
                                                               ,'latest_version' : row['latest_version']})
            
            # Execute the statement
            conn.execute(stmt)

## Input

### Keywords Schema

| Column           | Renamed   | Dataype | Description                                              |
| ---              | ---       | ---     | ---                                                      |
| abilityWords     | abilities | STRING  | A list of ability words found in rules text on cards     |
| keywordAbilities | keywords  | STRING  | A list of keyword abilities found in rules text on cards |
| keywordActions   | actions   | STRING  | A list of keyword actiona found in rules text on cards   |

In [11]:
# URL for MTGJSON (example: Keywords.xz)
url = "https://mtgjson.com/api/v5/Keywords.json.xz"

# Download the compressed file
response = requests.get(url)
response.raise_for_status()

# Decompress the .xz file
decompressed_bytes = lzma.decompress(response.content)

# Parse JSON into a dictionary
dict__keywords = json.loads(decompressed_bytes)

In [12]:
## Setting up credentials for accessing postgresql "mtg_db" database

# Credentials for setting up connection to postgresql
user     = "postgres"
password = "as:123bpostgresql"
host     = "localhost"
port     = "5432"
database = "mtg_db"

# Engine connection to postgresql
engine = create_engine(f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{database}")

In [13]:
## Creating the empty data_recency table if not exists
query = """
        CREATE TABLE IF NOT EXISTS raw_data.data_recency (
             json_type      TEXT PRIMARY KEY
            ,latest_date    DATE
            ,latest_version TEXT);
        """
with engine.begin() as conn:
    conn.execute(text(query))

## Pre-processing

In [14]:
# Checking the latest version of the input data
df__data_recency = data_recency_check(dict__keywords, "keyword")

## Main Code

In [15]:
## Converting the dictionary to a dataframe, renaming the columns and making empty values empty strings

# Converting the json dictionary to a dataframe
df__keywords = pd.DataFrame.from_dict(dict__keywords['data']
                                     # The columns are different lengths
                                     ,orient = 'index').transpose()

# Renaming the columns
df__keywords.columns = ['abilities'
                       ,'keywords'
                       ,'actions']

# Sort each column independently, pushing NaNs and empty strings to the bottom
df__keywords = df__keywords.apply(lambda col: col.replace('', np.nan)             # Treat empty strings as NaN
                                                 .sort_values(na_position='last') # Sort values
                                                 .fillna('')                      # Put empty strings back if desired
                                                 .values)                         # Reset index

## Output

In [16]:
recency_check_upload(schema_name = "raw_data"
                    ,table_name  = "data_recency"
                    ,dataframe   = df__data_recency)

In [17]:
# Uploading the keywords dataframe to postgresql
df__keywords.to_sql(name      = "keywords"
                   ,con       = engine
                   ,schema    = "raw_data"
                   ,if_exists = "replace"
                   ,index     = False)

212

## Checks

In [18]:
# Check the json file date and version
query = """
        SELECT *
        FROM raw_data.data_recency
        WHERE json_type = 'keyword'
        """
pd.read_sql_query(query, con=engine)

Unnamed: 0,json_type,latest_date,latest_version
0,keyword,2025-08-20,5.2.2+20250820


In [19]:
# Check the dataframe top 10 values
query = """
        SELECT *
        FROM raw_data.keywords
        LIMIT 10
        """
pd.read_sql_query(query, con=engine)

Unnamed: 0,abilities,keywords,actions
0,Adamant,Absorb,Abandon
1,Addendum,Affinity,Activate
2,Alliance,Afflict,Adapt
3,Battalion,Afterlife,Amass
4,Bloodrush,Aftermath,Assemble
5,Celebration,Amplify,Attach
6,Channel,Annihilator,Behold
7,Chroma,Ascend,Bolster
8,Cohort,Assist,Cast
9,Constellation,Augment,Clash
