In [1]:
# import subprocess

# # Say this runs for 5 seconds long, it moves on to the next line in this cell regardless (does not block execution)
# process = subprocess.Popen("sleep 3; ls -la", stdout=subprocess.PIPE, text=True, shell=True) 

# print("is this blocked?")
# # This is actively reading from process.stdout as process is still executing in the background, kind of like async operation
# for line in process.stdout:
#     print("I have control between each output :D")
#     print(line.strip())

# print("Am I here?")

In [2]:
import pytest

# Define the function and the test
def add(*args):
    return sum(args)

@pytest.fixture(autouse=True)
def setup():
    print("Initial Setup of Test..")

def test_add():
    assert add(1, 2) == 3

In [3]:
# import random

# def generate_openfda_urls(year, total_files) -> list:
#     """Generate file urls for a partition"""
#     quarter = random.randint(1,4)
#     files = [f'https://download.open.fda.gov/drug/event/{year}q{quarter}/drug-event-{i:04d}-of-{total_files:04d}.json.zip'
#              for i in range(1,total_files+1)]
#     return files

# def generate_openfda_partition(start_year, end_year):
#     """ Sample data generator for drug event json for batch handling"""
#     partition_id = random.randint(start_year,end_year)
#     count = random.randint(2,100)
#     size_mb = round(random.uniform(50.0,3000.0),2)
#     files = generate_openfda_urls(partition_id, count)

#     return {
#         'partition_id' : str(partition_id),
#         'count' : count,
#         'size_mb' : size_mb,
#         'files' : files
#     }

# def create_drug_events_json(start_year=2007, end_year=2024, num_partitions= 20):
#     return {
#         'total_records' : random.randint(500000,1500000),
#         'partitions' : [generate_openfda_partition(start_year,end_year) for _ in range(random.randint(2,num_partitions))]
#     }

In [4]:
# import pytest

# @pytest.fixture
# def setup():
#     print("Setup phase")
#     yield 1
#     print("Cleanup phase")

# def test_something(setup):
#     assert setup == 1

In [5]:
# t = create_drug_events_json()
# len(t['partitions'])

In [6]:
def generate_openfda_urls(year, total_files) -> list:
    """Generate file urls for a partition"""
    quarter = 1
    files = [f'https://download.open.fda.gov/drug/event/{year}q{quarter}/drug-event-{i:04d}-of-{total_files:04d}.json.zip'
             for i in range(1,total_files+1)]
    return files

# Configure so you know expected results
def generate_openfda_partition(year, count, size_mb):
    """ Sample data generator for drug event json for batch handling"""
    return {
        'partition_id' : str(year),
        'count' : count,
        'size_mb' : round(size_mb,2),
        'files' : generate_openfda_urls(year, count)
    }

In [7]:
def create_drug_events_json(total_records=65000, partition_config=None):
    """
    Generate a mock OpenFDA drug events JSON structure for testing.

    Args: 
        total_records (int) : Total number of records to report in the JSON
        partitions_config (list of dict): List of partition configuration dicts.
            Each dict should contain:
                - partition_id (str): Year of the partition, e.g., '2012'.
                - count (int): Number of files in this partition.
                - size_mb (float): Total size of all files in this partition.

    Returns:
        dict: A JSON-like dictionary with nested structure under 'results.drug.event'.

    Example:
        >>> create_drug_events_json(
        ...     total_records=5,
        ...     partitions_config=[
        ...         {'partition_id': '2020', 'count': 2, 'size_mb': 100.0},
        ...         {'partition_id': '2021', 'count': 3, 'size_mb': 150.0}
        ...     ]
        ... )
    """

    # Default partition config
    if not partition_config:
        partition_config = [{
            'partition_id' : '2012',
            'count' : 2,
            'size_mb' : 85.0,
        }]

    partitions = [
        generate_openfda_partition(year=p['partition_id'],count=p['count'],size_mb=p['size_mb']) 
        for p in partition_config
        ]

    return {
        'total_records' : total_records,
        'partitions' : partitions
    }
    

In [8]:
partition_params = [
    {'partition_id': '2020', 'count': 5, 'size_mb': 100.0},
    {'partition_id': '2021', 'count': 8, 'size_mb': 150.0}
    ]

create_drug_events_json(total_records=15000,partition_config=partition_params)

{'total_records': 15000,
 'partitions': [{'partition_id': '2020',
   'count': 5,
   'size_mb': 100.0,
   'files': ['https://download.open.fda.gov/drug/event/2020q1/drug-event-0001-of-0005.json.zip',
    'https://download.open.fda.gov/drug/event/2020q1/drug-event-0002-of-0005.json.zip',
    'https://download.open.fda.gov/drug/event/2020q1/drug-event-0003-of-0005.json.zip',
    'https://download.open.fda.gov/drug/event/2020q1/drug-event-0004-of-0005.json.zip',
    'https://download.open.fda.gov/drug/event/2020q1/drug-event-0005-of-0005.json.zip']},
  {'partition_id': '2021',
   'count': 8,
   'size_mb': 150.0,
   'files': ['https://download.open.fda.gov/drug/event/2021q1/drug-event-0001-of-0008.json.zip',
    'https://download.open.fda.gov/drug/event/2021q1/drug-event-0002-of-0008.json.zip',
    'https://download.open.fda.gov/drug/event/2021q1/drug-event-0003-of-0008.json.zip',
    'https://download.open.fda.gov/drug/event/2021q1/drug-event-0004-of-0008.json.zip',
    'https://download.o

In [9]:
def partition_id_by_year(p):
    """Extract partition id as YYYY"""
    return p.get('display_name','').strip().split(" ")[0]

def generate_openfda_urls(year, total_files) -> list:
    """Generate file urls for a partition"""
    quarter = 1
    files = [f'https://download.open.fda.gov/drug/event/{year}q{quarter}/drug-event-{i:04d}-of-{total_files:04d}.json.zip'
             for i in range(1,total_files+1)]
    return files

def generate_mock_download_json(total_records=12000,partition_config=None):
    if not partition_config:
        partition_config = [{'display_name' : '2012 Q1 (part 1 of 5)', 'size_mb' : '5.0'}]
    
    # {
    #     'total_records' : 12000,
    #     'partitions' : {'display_name': '2016 Q4 (part 11 of 23)',
    #                     'file': 'https://download.open.fda.gov/drug/event/2016q4/drug-event-0011-of-0023.json.zip',
    #                     'size_mb': '9.78'
    #                     }
    # }

    partitions = [
        {
            'display_name' : p['display_name'],
            'size_mb' : p['size_mb'],
            'file' : generate_openfda_urls(partition_id_by_year(p),total_files=1)[0]
        }
        for p in partition_config
    ]

    return {
        'results' : {
            'drug' : {
                'event' : {
                    'total_records' : total_records,
                    'partitions' : partitions
                }
            }
        }
    }

In [10]:
params = [
    {'display_name' : '2012 Q1 (part 1 of 3)', 'size_mb' : '5.0'},
    {'display_name' : '2012 Q1 (part 2 of 3)', 'size_mb' : '10.0'},
    {'display_name' : '2012 Q1 (part 3 of 3)', 'size_mb' : '25.0'},
    ]

generate_mock_download_json(partition_config=params)

{'results': {'drug': {'event': {'total_records': 12000,
    'partitions': [{'display_name': '2012 Q1 (part 1 of 3)',
      'size_mb': '5.0',
      'file': 'https://download.open.fda.gov/drug/event/2012q1/drug-event-0001-of-0001.json.zip'},
     {'display_name': '2012 Q1 (part 2 of 3)',
      'size_mb': '10.0',
      'file': 'https://download.open.fda.gov/drug/event/2012q1/drug-event-0001-of-0001.json.zip'},
     {'display_name': '2012 Q1 (part 3 of 3)',
      'size_mb': '25.0',
      'file': 'https://download.open.fda.gov/drug/event/2012q1/drug-event-0001-of-0001.json.zip'}]}}}}