In [1]:
#import os
import json
#import pandas as pd
from datetime import datetime

import boto3
from botocore.exceptions import ClientError

from dotenv import load_dotenv
load_dotenv()

True

In [2]:
# add these variables to the .env file
#AWS_ACCESS_KEY_ID = os.environ.get('AWS_ACCESS_KEY_ID')
#AWS_SECRET_ACCESS_KEY = os.environ.get('AWS_SECRET_ACCESS_KEY')

In [3]:

import logging
logger = logging.getLogger(__name__)

__LOCAL__ = True

if(__LOCAL__):
    dynamodb = boto3.resource('dynamodb', 
                                endpoint_url='http://10.0.0.179:31942', 
                                region_name='us-west-2',
                                aws_access_key_id='abc123',
                                aws_secret_access_key= 'abc123'
                            ) 
else:
    session = boto3.Session(region_name='us-west-2')
    dynamodb = session.resource('dynamodb')

class ArxivMetadata:
    """Encapsulates an Amazon DynamoDB table of request data."""
    def __init__(self, dyn_resource):
        """
        :param dyn_resource: A Boto3 DynamoDB resource.
        """
        self.dyn_resource = dyn_resource
        self.table = None

    def exists(self, table_name):
        """
        Determines whether a table exists. As a side effect, stores the table in
        a member variable.
        :param table_name: The name of the table to check.
        :return: True when the table exists; otherwise, False.
        """
        try:
            table = self.dyn_resource.Table(table_name)
            table.load()
            exists = True
        except ClientError as err:
            if err.response['Error']['Code'] == 'ResourceNotFoundException':
                exists = False
            else:
                logger.error(
                    "Couldn't check for existence of %s. Here's why: %s: %s",
                    table_name,
                    err.response['Error']['Code'], err.response['Error']['Message'])
                raise
        else:
            self.table = table
        return exists
    
    def create_table(self, table_name):
        """
        Creates an Amazon DynamoDB table that can be used to store request data.
        The table uses the article submission {yearmonth} as hash key and the article id as sort key.
        :param table_name: The name of the table to create.
        :return: The newly created table.
        """
        try:
            self.table = self.dyn_resource.create_table(
                TableName=table_name,
                KeySchema=[
                    {'AttributeName': 'yearmonth', 'KeyType': 'HASH'},  # Partition key
                    {'AttributeName': 'id', 'KeyType': 'RANGE'}  # Sort key
                ],
                AttributeDefinitions=[
                    {'AttributeName': 'yearmonth', 'AttributeType': 'S'},
                    {'AttributeName': 'id', 'AttributeType': 'S'},
                ],
                ProvisionedThroughput={'ReadCapacityUnits': 10, 'WriteCapacityUnits': 10})
            self.table.wait_until_exists()
        except ClientError as err:
            logger.error(
                "Couldn't create table %s. Here's why: %s: %s", table_name,
                err.response['Error']['Code'], err.response['Error']['Message'])
            raise
        else:
            return self.table
        
    def log_request(self, req_timestamp_str, model, request_str, response_str, rating = 0):
        """
        Log a request to the table.
        # TODO
        :param title: The title of the movie.
        :param year: The release year of the movie.
        :param plot: The plot summary of the movie.
        :param rating: The quality rating of the movie.
        """
        try:
            self.table.put_item(
                Item={
                    'timestamp': req_timestamp_str,
                    'model': model,
                    'request': request_str,
                    'response': response_str,
                    'rating': rating,
                }
            )
        except ClientError as err:
            logger.error(
                "Couldn't add request log %s to table %s. Here's why: %s: %s",
                model, self.table.name,
                err.response['Error']['Code'], err.response['Error']['Message'])
            raise    
    
    def add_request_log_entry(self, query_model, req, resp, rating=0):
        """
            Logs the cuurent model, req and response
        """
        today = datetime.now()
        # Get current ISO 8601 datetime in string format
        iso_date = today.isoformat()        
        self.log_request(iso_date, query_model, req, resp, rating)

table_name = 'arxiv-metadata'


In [4]:
def get_arxiv_metadata():
    arxiv_metadata = ArxivMetadata(dynamodb)
    arxiv_metadata_exists = arxiv_metadata.exists(table_name)
    if not arxiv_metadata_exists:
        print(f"\nCreating table {table_name}...")
        arxiv_metadata.create_table(table_name)
        print(f"\nCreated table {arxiv_metadata.table.name}.")
    return arxiv_metadata

arxiv_metadata = get_arxiv_metadata()

In [5]:
def load_arxiv_metadata(table):
    with open("arxiv-metadata-oai-snapshot.json") as f1:
        with table.batch_writer() as batch:
            for line in f1:
                metadata_record = json.loads(line)
                yearmonth = metadata_record['id'].split('.')[0]
                metadata_record['yearmonth'] = yearmonth
                batch.put_item(metadata_record)

load_arxiv_metadata(arxiv_metadata.table)

In [6]:
from boto3.dynamodb.conditions import Key

#2308.08625

response = arxiv_metadata.table.query(
    #KeyConditionExpression=Key('yearmonth').eq('1201') & Key('id').eq('1201.6082')
    #KeyConditionExpression=Key('yearmonth').eq('2306') & Key('id').eq('2306.11589'))
    KeyConditionExpression=Key('yearmonth').eq('1401') & Key('id').eq('1401.3841'))

In [7]:
response

{'Items': [{'submitter': 'Mark Owen Riedl',
   'comments': None,
   'abstract': '  Narrative, and in particular storytelling, is an important part of the human\nexperience. Consequently, computational systems that can reason about narrative\ncan be more effective communicators, entertainers, educators, and trainers. One\nof the central challenges in computational narrative reasoning is narrative\ngeneration, the automated creation of meaningful event sequences. There are\nmany factors -- logical and aesthetic -- that contribute to the success of a\nnarrative artifact. Central to this success is its understandability. We argue\nthat the following two attributes of narratives are universal: (a) the logical\ncausal progression of plot, and (b) character believability. Character\nbelievability is the perception by the audience that the actions performed by\ncharacters do not negatively impact the audiences suspension of disbelief.\nSpecifically, characters must be perceived by the audience