In [17]:
#3
#athena db config - llmach.csv
#AAI-540 Group 3 FP

This notebook implements a serverless analytics layer using Amazon Athena over benchmark data stored in Amazon S3.  

An external table is created to expose model performance results for SQL querying.  

No data is duplicated — only metadata is registered in the AWS Glue Data Catalog.  

The table provides structured inputs for downstream model capability and cost-performance analysis.

The dataset was cleaned in Notebook 0 and uploaded to S3 in Notebook 1.

In [18]:
import boto3
import sagemaker
from pyathena import connect
import pandas as pd

### Configure AWS and Athena Environment
Initialize AWS session, execution role, and Athena connection.

In [19]:
sess = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name
ingest_create_athena_db_passed = False

In [20]:
database_name = "dsoaws"
s3_staging_dir = "s3://{0}/athena/staging".format(bucket)
conn = connect(region_name=region, s3_staging_dir=s3_staging_dir)

### Create or Select Athena Database
Ensure the target Athena database exists for table registration.


In [21]:
statement = "CREATE DATABASE IF NOT EXISTS {}".format(database_name)
print(statement)
pd.read_sql(statement, conn)

CREATE DATABASE IF NOT EXISTS dsoaws


  pd.read_sql(statement, conn)


In [22]:
statement = "SHOW DATABASES"
df_show = pd.read_sql(statement, conn)
df_show.head(5)

  df_show = pd.read_sql(statement, conn)


Unnamed: 0,database_name
0,default
1,dsoaws
2,sagemaker_featurestore


### Define S3 Data Location for Athena Table
Specify dataset source path and dedicated S3 folder for table storage.


In [23]:
s3_data_path = f"s3://{bucket}/llmachievements.csv"
s3_table_path = f"s3://{bucket}/table2/"
print("s3_data_path:", s3_data_path)
print("s3_table_path:", s3_table_path)

s3_data_path: s3://sagemaker-us-east-1-907086662522/llmachievements.csv
s3_table_path: s3://sagemaker-us-east-1-907086662522/table2/


In [24]:
!aws s3 cp {s3_data_path} {s3_table_path}

copy: s3://sagemaker-us-east-1-907086662522/llmachievements.csv to s3://sagemaker-us-east-1-907086662522/table2/llmachievements.csv


In [25]:
#table2 - llmach.csv
table_name_csv = 'llmachievements'
conn = connect(region_name=region, s3_staging_dir=s3_staging_dir)
ingest_create_athena_table_csv_passed = False
dataexplore = pd.read_csv('llmachievements.csv')
dataexplore.head()

Unnamed: 0,Field,Achievement,Result,Human result,Outperforms human avg?,Model,Testing date,Extract
0,Music,97% of people can’t tell the difference betwee...,,,Yes,Multiple,Nov/2025,"""all participants were asked to listen to thre..."
1,Transcription,Transcribing handwritten historical documents.,99.44,96.0,Yes,Gemini 3,Nov/2025,"""The new Gemini model’s performance on HTR mee..."
2,Finance,Large Language Models pass CFA Level III.,79.1,50.0,Yes,o4-mini,Jul/2025,"""leading models demonstrate strong capabilitie..."
3,CBRN,LLMs can can accurately guide users through th...,,,Yes,GPT-4o,Jun/2025,"""we find that advanced AI models Llama 3.1 405..."
4,Health reviews,LLMs outperform humans in synthesizing results...,96.7,81.7,Yes,o3-mini-high,Jun/2025,"""We developed otto-SR, an end-to-end agentic w..."


In [26]:
dataexplore.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 57 entries, 0 to 56
Data columns (total 8 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Field                   53 non-null     object 
 1   Achievement             55 non-null     object 
 2   Result                  24 non-null     float64
 3   Human result            12 non-null     float64
 4   Outperforms human avg?  53 non-null     object 
 5   Model                   54 non-null     object 
 6   Testing date            53 non-null     object 
 7   Extract                 53 non-null     object 
dtypes: float64(2), object(6)
memory usage: 3.7+ KB


In [27]:
!aws s3 cp {s3_data_path} {s3_table_path}

copy: s3://sagemaker-us-east-1-907086662522/llmachievements.csv to s3://sagemaker-us-east-1-907086662522/table2/llmachievements.csv


In [28]:
!aws s3 ls {s3_table_path}

2026-02-22 23:17:20      18853 llmachievements.csv


In [29]:
df_check = pd.read_csv("llmachievements.csv")
print(list(df_check.columns))

['Field', 'Achievement', 'Result', 'Human result', 'Outperforms human avg?', 'Model', 'Testing date', 'Extract']


In [30]:
#sql table creation
statement = """CREATE EXTERNAL TABLE IF NOT EXISTS {}.{}(
         Field STRING,
         Achievement STRING,
         Result FLOAT,
         Human_result FLOAT,
         Outperform_hum STRING,
         Model STRING,
         Testing_date STRING,
         Extract STRING
) ROW FORMAT DELIMITED 
  FIELDS TERMINATED BY ',' 
  LINES TERMINATED BY '\\n' 
LOCATION '{}'
TBLPROPERTIES ('skip.header.line.count'='1')""".format(
    database_name, table_name_csv, s3_table_path
)

In [31]:
pd.read_sql(statement, conn)

  pd.read_sql(statement, conn)


In [32]:
statement = "SHOW TABLES in {}".format(database_name)
df_show = pd.read_sql(statement, conn)
df_show.head(5)

  df_show = pd.read_sql(statement, conn)


Unnamed: 0,tab_name
0,aimodelpoll
1,amazon_reviews_parquet
2,amazon_reviews_tsv
3,lifearchitect
4,llmachievements


In [33]:
statement = """SELECT * FROM {}.{} LIMIT 10""".format(
    database_name, table_name_csv
)
print(statement)
df = pd.read_sql(statement, conn)
df.head()

SELECT * FROM dsoaws.llmachievements LIMIT 10


  df = pd.read_sql(statement, conn)


Unnamed: 0,field,achievement,result,human_result,outperform_hum,model,testing_date,extract
0,Music,97% of people can’t tell the difference betwee...,,,Yes,Multiple,Nov/2025,"""""""all participants were asked to listen to th..."
1,Transcription,Transcribing handwritten historical documents.,99.44,96.0,Yes,Gemini 3,Nov/2025,"""""""The new Gemini model’s performance on HTR m..."
2,Finance,Large Language Models pass CFA Level III.,79.1,50.0,Yes,o4-mini,Jul/2025,"""""""leading models demonstrate strong capabilities"
3,CBRN,LLMs can can accurately guide users through th...,,,Yes,GPT-4o,Jun/2025,"""""""we find that advanced AI models Llama 3.1 405B"
4,Health reviews,LLMs outperform humans in synthesizing results...,96.7,81.7,Yes,o3-mini-high,Jun/2025,"""""""We developed otto-SR"


### Summary

This notebook successfully registered the LLM benchmark dataset as an external table in Athena.

The data is now available for SQL querying and downstream model capability analysis.
