In [29]:
import boto3
import time
import pandas as pd

In [34]:
# AWS Credentials & Region
AWS_REGION = "us-east-2"  # Change to your region
DATABASE = "chalk"
TABLE = "chalkjuice_data"
S3_OUTPUT = "s3://chalkjuice/golden_athena/"  # Replace with your actual S3 bucket


In [35]:
# Initialize Athena Client
athena_client = boto3.client("athena", region_name=AWS_REGION)

In [63]:
# Define Variables
team_name = "MIN"
year1, year2, year3 = 2021, 2022, 2023  # Adjust years as needed

# Construct Query
query = f'''
    SELECT * FROM "{DATABASE}"."{TABLE}" 
    WHERE season IN ({year1}, {year2}, {year3});
'''

# Start Query Execution
response = athena_client.start_query_execution(
    QueryString=query,
    QueryExecutionContext={"Database": DATABASE},
    ResultConfiguration={"OutputLocation": S3_OUTPUT},
)

# Get Query Execution ID
query_execution_id = response["QueryExecutionId"]

In [64]:
# Wait for Query to Complete
while True:
    status = athena_client.get_query_execution(QueryExecutionId=query_execution_id)
    state = status["QueryExecution"]["Status"]["State"]
    print(state)
    
    if state in ["SUCCEEDED", "FAILED", "CANCELLED"]:
        break
    
    time.sleep(.1)  # Check every .1 seconds

if state != "SUCCEEDED":
    raise Exception(f"Athena query failed with state: {state}")

# Get Query Results
results = athena_client.get_query_results(QueryExecutionId=query_execution_id)

columns = [col["Label"] for col in results["ResultSet"]["ResultSetMetadata"]["ColumnInfo"]]

# Extract Rows
rows = []
for row in results["ResultSet"]["Rows"][1:]:  # Skip header row
    extracted_row = [col.get("VarCharValue", None) for col in row["Data"]]  # Extract actual values
    rows.append(extracted_row)

# Convert to Pandas DataFrame
df = pd.DataFrame(rows, columns=columns)
df = df.fillna("NA")
df.columns = df.columns.str.replace('_', ' ').str.title()
df.head(10)



RUNNING
RUNNING
RUNNING
SUCCEEDED


Unnamed: 0,Date,Week,Team,Opponent,Result,Points,Points Allowed,Overtime,Home Game,Passing Com,...,Tds Pr,Tds Blocked Fg,Tds Blocked Punt,Tds Walkoff,Tds Other,1D Passes,1D Runs,Weekday,Game Duration Minutes,Season
0,9/7/2023,1,DET,KAN,W,21,20,0,1,22,...,0,0,0,0,0,11,8,Thursday,182,2023
1,9/7/2023,1,KAN,DET,L,20,21,0,0,21,...,0,0,0,0,0,10,6,Thursday,182,2023
2,9/10/2023,1,ARI,WAS,L,16,20,0,1,21,...,0,0,0,0,0,9,4,Sunday,188,2023
3,9/10/2023,1,ATL,CAR,W,24,10,0,0,15,...,0,0,0,0,0,6,7,Sunday,186,2023
4,9/10/2023,1,BAL,HOU,W,25,9,0,0,17,...,0,0,0,0,0,8,7,Sunday,196,2023
5,9/10/2023,1,CAR,ATL,L,10,24,0,1,20,...,0,0,0,0,0,9,9,Sunday,186,2023
6,9/10/2023,1,CHI,GNB,L,20,38,0,0,24,...,0,0,0,0,0,12,8,Sunday,190,2023
7,9/10/2023,1,CIN,CLE,L,3,24,0,1,14,...,0,0,0,0,0,3,2,Sunday,183,2023
8,9/10/2023,1,CLE,CIN,W,24,3,0,0,16,...,0,0,0,0,0,9,12,Sunday,183,2023
9,9/10/2023,1,DAL,NYG,W,40,0,0,1,13,...,0,1,0,0,0,8,7,Sunday,175,2023


In [None]:
import pandas as pd

df['Date'] = pd.to_datetime(df['Date'])  # Convert date column to datetime

df = df.sort_values(by=['Season', 'Date'], ascending=[False, True, True])

df.head(50)



Unnamed: 0,Date,Week,Team,Opponent,Result,Points,Points Allowed,Overtime,Home Game,Passing Com,...,Tds Pr,Tds Blocked Fg,Tds Blocked Punt,Tds Walkoff,Tds Other,1D Passes,1D Runs,Weekday,Game Duration Minutes,Season
0,2023-09-07,1,DET,KAN,W,21,20,0,1,22,...,0,0,0,0,0,11,8,Thursday,182,2023
1,2023-09-07,1,KAN,DET,L,20,21,0,0,21,...,0,0,0,0,0,10,6,Thursday,182,2023
2,2023-09-10,1,ARI,WAS,L,16,20,0,1,21,...,0,0,0,0,0,9,4,Sunday,188,2023
3,2023-09-10,1,ATL,CAR,W,24,10,0,0,15,...,0,0,0,0,0,6,7,Sunday,186,2023
4,2023-09-10,1,BAL,HOU,W,25,9,0,0,17,...,0,0,0,0,0,8,7,Sunday,196,2023
5,2023-09-10,1,CAR,ATL,L,10,24,0,1,20,...,0,0,0,0,0,9,9,Sunday,186,2023
6,2023-09-10,1,CHI,GNB,L,20,38,0,0,24,...,0,0,0,0,0,12,8,Sunday,190,2023
7,2023-09-10,1,CIN,CLE,L,3,24,0,1,14,...,0,0,0,0,0,3,2,Sunday,183,2023
8,2023-09-10,1,CLE,CIN,W,24,3,0,0,16,...,0,0,0,0,0,9,12,Sunday,183,2023
9,2023-09-10,1,DAL,NYG,W,40,0,0,1,13,...,0,1,0,0,0,8,7,Sunday,175,2023
