In [4]:
import boto3
from botocore.exceptions import ClientError

# Initialize DynamoDB resource
dynamodb = boto3.resource('dynamodb', region_name='us-east-1')  # Change region

# Reference your table
table = dynamodb.Table('nova-ingest-ads-queue')
col_name = "data"

try:
    # Scan the table but only return the specific column (attribute)
    response = table.scan(
        ProjectionExpression=col_name  # Replace with your attribute
    )

    # Extract values
    items = response.get('Items', [])
    values = [item.get(col_name) for item in items]

    print("Fetched values:", values)

    # Handle pagination (DynamoDB Scan can return partial results)
    while 'LastEvaluatedKey' in response:
        response = table.scan(
            ProjectionExpression=col_name,
            ExclusiveStartKey=response['LastEvaluatedKey']
        )
        items = response.get('Items', [])
        values.extend(item.get(col_name) for item in items)

    print("All values:", values)

except ClientError as e:
    print(f"Error scanning table: {e.response['Error']['Message']}")


Error scanning table: Invalid ProjectionExpression: Attribute name is a reserved keyword; reserved keyword: data


In [7]:
from datetime import date, datetime, timezone

x = datetime.now(timezone.utc).isoformat()

In [8]:
type(x)

str

In [12]:
import re

ADS_BIBCODE_RE = re.compile(r"^\S{4}\S*$")  # (placeholder) keep loose unless you want strict ADS format


# bibcode = "2022yCat..22570049C"
bibcode = "2021ITNS...815C" 
def _clean_bibcode(cls, v):
    if v is None:
        raise ValueError("bibcode is required.")
    s = str(v).strip()
    if not s:
        raise ValueError("bibcode cannot be empty or whitespace.")
    # keep pattern loose unless you want to enforce the exact ADS bibcode grammar
    if not ADS_BIBCODE_RE.match(s):
        print("bibcode %r did not match ADS_BIBCODE_RE; keeping as-is", s)
        # logger.info("bibcode %r did not match ADS_BIBCODE_RE; keeping as-is", s)
    return s

_clean_bibcode(None, bibcode)

'2021ITNS...815C'

In [13]:
from astroquery.simbad import Simbad

_simbad = Simbad()
_simbad.add_votable_fields("ra(d)", "dec(d)", "otypes")

result = _simbad.query_object("V1324 Sco")
result

  _simbad.add_votable_fields("ra(d)", "dec(d)", "otypes")
  _simbad.add_votable_fields("ra(d)", "dec(d)", "otypes")


main_id,ra,dec,coo_err_maj,coo_err_min,coo_err_angle,coo_wavelength,coo_bibcode,otypes.origin,otypes.otype,otypes.otype_txt,matched_id
Unnamed: 0_level_1,deg,deg,mas,mas,deg,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
object,float64,float64,float32,float32,int16,str1,object,object,object,object,object
V* V1324 Sco,267.72462243384,-32.62252526723,4.4393,3.6886,90,O,2020yCat.1350....0G,from id Gaia DR2 4043499439062100096,*,*,V* V1324 Sco
V* V1324 Sco,267.72462243384,-32.62252526723,4.4393,3.6886,90,O,2020yCat.1350....0G,from bibcode ; bibcode=2020MNRAS.492L..40A,CV*,CV*,V* V1324 Sco
V* V1324 Sco,267.72462243384,-32.62252526723,4.4393,3.6886,90,O,2020yCat.1350....0G,from id NOVA Sco 2012 ; bibcode=2017ARep...61...80S,No*,No*,V* V1324 Sco
V* V1324 Sco,267.72462243384,-32.62252526723,4.4393,3.6886,90,O,2020yCat.1350....0G,from id V* V1324 Sco,V*,V*,V* V1324 Sco


In [14]:
result.colnames

['main_id',
 'ra',
 'dec',
 'coo_err_maj',
 'coo_err_min',
 'coo_err_angle',
 'coo_wavelength',
 'coo_bibcode',
 'otypes.origin',
 'otypes.otype',
 'otypes.otype_txt',
 'matched_id']

In [16]:
def _round_coord(x: float, places: int = 6) -> str:
    if x is None:
        return None
    # Gentle rounding; avoid "-0.0"
    r = round(float(x), places)
    r = 0.0 if r == -0.0 else r
    return f"{r:.{places}f}"

_round_coord(206.265987111)

'206.265987'

In [17]:
ADS_FIELDS = [
    "bibcode","bibstem","doctype","property","identifier","data",
    "date","entry_date","data","author","abstract","title"]

x = ",".join(ADS_FIELDS)
x

'bibcode,bibstem,doctype,property,identifier,data,date,entry_date,data,author,abstract,title'

In [18]:
# import logging
from typing import Any, Dict, List
from pydantic import ValidationError
# from nova_schema.nova import Nova
# # from nova_schema.harvest import HarvestCandidate
# from nova_schema.mapping.ads_mapping import map_ads_response_to_harvest
import os,boto3,json,sys
from pathlib import Path

# Example: add ../shared to sys.path (relative to current file)
shared_path = Path(__file__).resolve().parent.parent.parent / "aws_novae/nova-ingest/shared"
sys.path.append(str(shared_path))
from nova_schema.nova import Nova

# logger = logging.getLogger(__name__)
# logger.setLevel(logging.INFO)

ADS_API_URL = os.getenv("ADS_API_URL", "https://api.adsabs.harvard.edu/v1/search/query")

# # We still request these (to derive OA & heuristics), but won't return the heavy ones in payload
ADS_FIELDS = [
    "bibcode","bibstem","doctype","property","identifier","data",
    "date","entry_date","data","author","abstract","title"
]

def _build_ads_query(canonical: Dict[str, Any]) -> Dict[str, Any]:
    name = canonical.get("primary_name")
    q_parts = []
    if name:
        q_parts.append(f'object:"{name}"')
    # Add other heuristics (aliases, coordinates windows) if you like
    q = " AND ".join(q_parts) if q_parts else "*:*"
    # Request only fields you actually need
    fl = ",".join(ADS_FIELDS)
    return {"q": q, "fl": fl, "rows": 50, "sort": "date desc"}

def _ads_request(params: Dict[str, Any]) -> Dict[str, Any]:
    import os, requests
    token ="7eYEFm24avvj5QHN9bcNQlCs5AmVulwwxFqYElry"
    headers = {"Authorization": f"Bearer {token}"} if token else {}
    r = requests.get("https://api.adsabs.harvard.edu/v1/search/query",
                     params=params, headers=headers, timeout=20)
    r.raise_for_status()
    return r.json()

test_event = "/Users/tfinzell/Git/aws_novae/nova-ingest/.out_resolve.json"
with open(test_event, "r") as f:
    event = json.load(f)

canonical_in: Dict[str, Any] = event.get("canonical") or {}

nova = Nova(**canonical_in)
params = _build_ads_query(nova.model_dump(mode="json"))
raw = _ads_request(params)

NameError: name '__file__' is not defined