# Snapshot

This notebook shows how to send a request to run an extraction based on a selection query, monitor the job until it is completed, download the generated files, and starting to analyse data.

In [1]:
import os
import requests as req
import pandas as pd
import fastavro
import json
from io import BytesIO
from dotenv import load_dotenv
load_dotenv()

USER_KEY = os.getenv("FSS_USERKEY")
BASE_URL = "https://api.dowjones.com"

REQ_HEADERS = {
    "user-key": USER_KEY,
    "Accept": "application/json",
    "Content-Type": "application/json"
}

## Define the Query

In [2]:
s_query = {'query': {
        'companies': [
            'MCROST',
            'TSLMI',
            'NVDCRP'
        ],
        'start': '2024-12-08',
        'end': '2024-12-16',
        'articles': True,
        'format': 'avro'
    }
}

## Submit Job

In [3]:
snapshot_post_url = f"{BASE_URL}/fss/extractions"
payload = json.dumps(s_query)
response = req.post(snapshot_post_url, data=payload, headers=REQ_HEADERS)
snapshot_response = response.json()
short_id = snapshot_response['extraction_id'].split('-')[-1]

In [4]:
print((f"State: {snapshot_response['state']}\n"
       f"Short ID: {short_id}"))

State: JOB_STATE_RUNNING
Short ID: 8a5d2cfa53


## Check Job Status

In [5]:
# Use only when downloading files from an old extraction
# For articles = True use '0e2f212bf5' (109 files)
# For articles = True (Small) use 'a236f418d8' (1 file)
# For articles = False use 'eb26b0186e' (1 file)
historic_short_id = ''
if len(historic_short_id) == 10:
    snapshot_id = f"dj-synhub-fss-extraction-{USER_KEY}-{historic_short_id}"
else:
    snapshot_id = f"dj-synhub-fss-extraction-{USER_KEY}-{short_id}"

In [6]:
status_post_url = f"{BASE_URL}/fss/extractions/{snapshot_id}"
response = req.get(status_post_url, headers=REQ_HEADERS)
status_response = response.json()
check_state = status_response['state']
check_id = status_response['extraction_id'].split('-')[-1]
check_total_files = len(status_response['destination'])

In [7]:
print((f"State: {check_state}\n"
       f"Short ID: {check_id}\n"
       f"Total Files: {check_total_files}"))

State: JOB_STATE_DONE
Short ID: 8a5d2cfa53
Total Files: 1


## Download Files

In [8]:
files_list = status_response['destination']

for file in files_list:
    file_resp = req.get(file, headers=REQ_HEADERS)
    if file_resp.status_code == 200:
        data_dir = f"./data/{check_id}/"
        file_name = file.split('/')[-1]
        file_path = os.path.join(data_dir, file_name)
        os.makedirs(data_dir, exist_ok=True)
        with open(file_path, 'wb') as f:
            f.write(file_resp.content)

print(f"{check_total_files} files saved to ./data/{check_id}/")

1 files saved to ./data/8a5d2cfa53/
