# This notebook displays how to create and query correlator data

## Import

In [None]:
from typing import Union, List

from itertools import product

import datetime
import pytz

from django.db import IntegrityError

from lattedb.project.formfac.models.data.correlator import (
    CorrelatorMeta,
    DiskCorrelatorH5Dset,
    TapeCorrelatorH5Dset,
)

Set a timezone for creating timezone aware objects

## Create a time for the given timezone

In [None]:
us_timezones = sorted([timezone for timezone in pytz.all_timezones if "US" in timezone])
print([timezone.split("/")[-1] for timezone in us_timezones])

timezone = pytz.timezone("US/Michigan")

time = datetime.datetime(2020, 3, 11, 23, 59, 59, 1234, timezone)
time

## How to create a Disk or Tape entry for correlators

First create a meta object (this is the object which will eventually replaced by the actual meta tables)

In [None]:
meta_infos = {"corr": "phi_qq", "configuration": 200, "source": "x22y21z20t19"}
corr_meta, created = CorrelatorMeta.objects.get_or_create(**meta_infos)
corr_meta

Next create a disk or Disk entry

In [None]:
disk_infos = {
    "name": "corr.h5",
    "path": "/summit/path/to/file/",
    "dset": "phi_qq/cfg_199/src_x22y21z20t19/array",
    "exists": True,
    "machine": "Summit",
    "date_modified": time,
    "meta": corr_meta,
}
disk_meta, created = DiskCorrelatorH5Dset.objects.get_or_create(**disk_infos)
disk_meta

Tape creation works the same way with the difference that you should use `TapeCorrelatorH5Dset` instead of `DiskCorrelatorH5Dset`

## Bulk push disk

In [None]:
meta_list = []
meta_tmp = meta_infos.copy()

for cfg in range(21, 30):
    meta_tmp["configuration"] = cfg
    # Create python object but do not push to db
    meta_list.append(CorrelatorMeta(**meta_tmp))

# Push to db
## Note: This only works if objects do not exist
try:
    meta_objs = CorrelatorMeta.objects.bulk_create(meta_list)
    print(meta_objs)
except IntegrityError:
    meta_objs = []
    print("At least one object already exists in db")

In [None]:
disk_list = []
disk_tmp = disk_infos.copy()

for meta in meta_objs:
    disk_tmp["dset"] = f"phi_qq/cfg_{meta.configuration}/src_x22y21z20t19/array"
    # Pass *existing* python meta object to file info
    disk_tmp["meta"] = meta
    # Create python file object but do not push to db
    disk_list.append(DiskCorrelatorH5Dset(**disk_tmp))


# Push to db
## Note: This only works if objects do not exist
try:
    disk_objs = DiskCorrelatorH5Dset.objects.bulk_create(disk_list)
    print(disk_objs)
except IntegrityError:
    disk_objs = []
    print("At least one object already exists in db")

## Queries

### Find out if file exists somewhere for given meta

In [None]:
def get_file_location(
    corr: str, configuration: int, source: str
) -> Union[DiskCorrelatorH5Dset, TapeCorrelatorH5Dset, None]:
    """Looks up if a given correlator can be found on disk or tape.
    
    Returns the corresponding object if found, else None.
    If both disk and tape object exists, return Disk object first.
    """
    obj = None
    meta = CorrelatorMeta.objects.filter(
        corr=corr, configuration=configuration, source=source
    ).first()

    if meta is not None:
        if hasattr(corr_meta, "disk") and corr_meta.disk.exists:
            obj = meta.disk
        elif hasattr(corr_meta, "tape") and corr_meta.tape.exists:
            obj = meta.tape

    return obj

In [None]:
file_obj = get_file_location(**meta_infos)
print(file_obj.type)
print(file_obj.machine)
print(file_obj.file_address)
print(file_obj.dset)
print()

file_obj = get_file_location("mres", 200, "bla")
print(file_obj)

# Cross-check script logic

Starting point for script is

Given values:
* `corr`
* `ensemble`
* `stream`
* `source_set` (this fixes list of `sources`)

List values:
* `configuration`

In [None]:
CORR = "phi_qq"
ENS = "a09m134XL"
STREAM = "a"
SRC_SET = "1-8"
CFGS = list(range(0, 60, 5))

The below function creates or gets and returns all meta entries related to the given parameters

In [None]:
def get_or_create_meta_entries(
    corr: str,
    configuration_range: List[int],
    ensemble: str,
    stream: str,
    source_set: str,
) -> List[CorrelatorMeta]:
    """Returns queryset of CorrelatorMeta entries for given input
    
    Creates entries in bulk if they do not exist.
    """
    # Pull all relevant meta entries to local python script
    meta_entries = CorrelatorMeta.objects.filter(
        corr=corr,
        configuration__in=configuration_range,
        ensemble=ensemble,
        stream=stream,
        source_set=source_set,
    )

    kwargs = {
        "corr": corr,
        "ensemble": ensemble,
        "stream": stream,
        "source_set": source_set,
    }
    src_min, src_max = source_set.split("-")
    srcs = range(int(src_min), int(src_max))  # inclusive or exclusive?

    # Check if all entries are present
    entries_to_create = []
    for src, cfg in product(srcs, configuration_range):
        meta_data = kwargs.copy()
        meta_data["source"] = src
        meta_data["configuration"] = cfg

        if not meta_entries.filter(**meta_data).first():
            entries_to_create.append(CorrelatorMeta(**meta_data))

    # Create entries if not present
    if entries_to_create:
        created_entries = CorrelatorMeta.objects.bulk_create(entries_to_create)
        print(f"Created {len(created_entries)} entries")
        meta_entries = CorrelatorMeta.objects.filter(
            corr=corr,
            configuration__in=configuration_range,
            ensemble=ensemble,
            stream=stream,
            source_set=source_set,
        )

    # Return all entries
    return meta_entries

### @walkloud: Please check the default kwarg logic for entries which do not exist!

The below function creates or gets and returns all tape/disk entries for related meta entries. You should check if the attributes make sense.

In [None]:
def get_or_create_tape_entries(
    meta_entries: List[CorrelatorMeta],
) -> List[TapeCorrelatorH5Dset]:
    """Returns queryset of TapeCorrelatorH5Dset entries for given CorrelatorMeta entries
    
    Creates entries in bulk with status does not exist if they do not exist in DB.
    """
    file_entries = TapeCorrelatorH5Dset.objects.filter(meta__in=meta_entries)
    
    # Create entries if not present
    kwargs = {
        "name": "corr-name.h5",
        "path": "/abs/path/to/file/folder",
        "machine": "summit",
        "exists": False,
    }
    
    if file_entries.count() != meta_entries.count():
        entries_to_create = []
        for meta in meta_entries:
            data = kwargs.copy()
            data["dset"] = f"/path/to/dset/{meta.configuration}/{meta}"
            data["meta"] = meta
            data["date_modified"] = time
            entries_to_create.append(TapeCorrelatorH5Dset(**data))
        
        created_entries = TapeCorrelatorH5Dset.objects.bulk_create(entries_to_create)
        print(f"Created {len(created_entries)} entries")
        file_entries = TapeCorrelatorH5Dset.objects.filter(meta__in=meta_infos)
    
    return file_entries
    

In [None]:
def get_or_create_disk_entries(
    meta_entries: List[CorrelatorMeta],
) -> List[DiskCorrelatorH5Dset]:
    """Returns queryset of DiskCorrelatorH5Dset entries for given CorrelatorMeta entries
    
    Creates entries in bulk with status does not exist if they do not exist in DB.
    """
    file_entries = DiskCorrelatorH5Dset.objects.filter(meta__in=meta_entries)
    
    # Create entries if not present
    kwargs = {
        "name": "corr-name.h5",
        "path": "/abs/path/to/file/folder",
        "machine": "summit",
        "exists": False,
    }
    
    if not file_entries.count() == meta_entries.count():
        entries_to_create = []
        for meta in meta_entries:
            data = kwargs.copy()
            data["dset"] = f"/path/to/dset/{meta.configuration}/{meta}"
            data["meta"] = meta
            data["date_modified"] = time
            entries_to_create.append(DiskCorrelatorH5Dset(**data))
        
        created_entries = DiskCorrelatorH5Dset.objects.bulk_create(entries_to_create)
        print(f"Created {len(created_entries)} entries")
        file_entries = DiskCorrelatorH5Dset.objects.filter(meta__in=meta_infos)
    
    return file_entries
    

## How a script would look like

Run this in the beginning. This will make sure that the db has all entries present.
For example, if expected entries for all input parameters are not in the DB, they will be created.
The file status will be `exists=False`.
You should adjust the `path`, `dset` and other attributes to make sense.
Entries which are already present in the db will be pulled to the local system and you can work with them.

In [None]:
meta_entries = get_or_create_meta_entries(CORR, CFGS, ENS, STREAM, SRC_SET)
tape_entries = get_or_create_tape_entries(meta_entries)
disk_entries = get_or_create_disk_entries(meta_entries)

In [None]:
for entry in meta_entries.filter(tape__exists=False, disk__exists=False):
    # do logic
    tape = entry.tape
    
    tape.exists = True
    tape.save()
    
    disk = entry.disk
    print(disk.file_address)
    break
    
tape_entries.filter(exists=True).count()