# DynamoDB Caching

## Dependencies

In [1]:
from packages.helpers.helpers import joel_boto
import pandas as pd
import hashlib
import zlib
import json

## Functions

In [2]:
def hash_query(query):
    """Generate a SHA256 hash for the given query."""
    return hashlib.sha256(query.encode()).hexdigest()

In [3]:
def compress_json(df):
    # Convert DataFrame to JSON format
    json_str = df.to_json(orient="records")
    
    # Compress the JSON string
    compressed_data = zlib.compress(json_str.encode())

    return compressed_data

In [4]:
def decompress_json(compressed_data):
    """Decompress JSON data from zlib."""
    return json.loads(zlib.decompress(compressed_data).decode())

## Custom Variables

In [5]:
ATHENA_DATABASE = "nfl"
ATHENA_TABLE = "nfl_games_all"
ATHENA_OUTPUT_BUCKET = "s3://chalkjuice-backend/nfl_games_all_athena_parquet/"  # Replace with your actual S3 bucket


DYNAMODB_TABLE = "nfl_games_all"
table_name = DYNAMODB_TABLE

partition_key = 'query_hash'
attribute_type = 'S'

In [6]:
# Connect to custom AWS class
jb = joel_boto()

✅ Logged in to ECR successfully.
✅ Connected to all clients successfully.


## Create DynamoDb table

In [7]:
x = jb.check_dynamo_table_exists(table_name)
if x:
    pass
else:
    jb.create_dynamodb_table(table_name, partition_key, attribute_type)
    print('table created')

✅ Table 'nfl_games_all' already exists.


## Add data to table

In [None]:
teams = [
    "ARI", "ATL", "BAL", "BUF", "CAR", "CHI", "CIN", "CLE", "DAL", "DEN",
    "DET", "GNB", "HOU", "IND", "JAX", "KAN", "LVR", "LAC", "LAR", "MIA",
    "MIN", "NWE", "NOR", "NYG", "NYJ", "PHI", "PIT", "SFO", "SEA", "TAM",
    "TEN", "WAS"
]

for year in range(1967, 2024):
    for team in teams:

        # Create query and query hash
        query = f'''SELECT * FROM "nfl"."nfl_games_all" WHERE season = {year} AND team = '{team}';'''
        print(query)
    
        query_hash = hash_query(query)

        # Save query results as json
        query_execution_id = jb.query_athena(query, ATHENA_DATABASE, ATHENA_OUTPUT_BUCKET)
        df = jb.create_df_from_athena_query(query_execution_id)
        
        df['Date'] = pd.to_datetime(df['Date'])  # Convert date column to datetime

        df = df.sort_values(by=['Season', 'Date'], ascending=[False, True])

        df['Date'] = df['Date'].astype(str)  # Convert the Date column to string
        compressed_json = compress_json(df)

        year = str(year)

        dict_items={
            "query_hash": {"S": query_hash},
            "compressed_json": {"B": compressed_json}, # Compressed JSON stored as binary
            "hits": {"N": "0"},
            "team": {"S": team},
            "year": {"S": year}
        }

        # Store the query hash and the resulting data into dynamoDB
        jb.add_data_to_dynamo_table(ATHENA_TABLE, dict_items)
    

SELECT * FROM "nfl"."nfl_games_all" WHERE season = 1967 AND team = 'ARI';
Query finished with status: SUCCEEDED
SELECT * FROM "nfl"."nfl_games_all" WHERE season = 1967 AND team = 'ATL';
Query finished with status: SUCCEEDED
SELECT * FROM "nfl"."nfl_games_all" WHERE season = 1967 AND team = 'BAL';
Query finished with status: SUCCEEDED
SELECT * FROM "nfl"."nfl_games_all" WHERE season = 1967 AND team = 'BUF';
Query finished with status: SUCCEEDED
SELECT * FROM "nfl"."nfl_games_all" WHERE season = 1967 AND team = 'CAR';
Query finished with status: SUCCEEDED
SELECT * FROM "nfl"."nfl_games_all" WHERE season = 1967 AND team = 'CHI';
Query finished with status: SUCCEEDED
SELECT * FROM "nfl"."nfl_games_all" WHERE season = 1967 AND team = 'CIN';
Query finished with status: SUCCEEDED
SELECT * FROM "nfl"."nfl_games_all" WHERE season = 1967 AND team = 'CLE';
Query finished with status: SUCCEEDED
SELECT * FROM "nfl"."nfl_games_all" WHERE season = 1967 AND team = 'DAL';
Query finished with status: SU

## Check if data is there. Optional: If there... delete

In [8]:
for year in range(1967, 2024):
    query = f'''SELECT * FROM "nfl"."nfl_games_all" WHERE season = {year};'''
    query_hash = hash_query(query)
    
    if jb.check_dynamo_item_exists_by_partition_key(DYNAMODB_TABLE, partition_key, query_hash):
        print(f"🗑️ Item for year {year} was cached — deleting...")
        #jb.delete_dynamo_item_by_partition_key(DYNAMODB_TABLE, partition_key, query_hash)
    else:
        print("Item was not cached.")


Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
Item was not cached.
🗑️ Item for year 2013 was cached —