In [1]:
import gpt as g
import numpy as np
import os, sys

SharedMemoryMpi:  World communicator of size 1
SharedMemoryMpi:  Node  communicator of size 1
SharedMemoryMpi: SharedMemoryAllocate 1073741824 MMAP anonymous implementation 

__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
__|_ |  |  |  |  |  |  |  |  |  |  |  | _|__
__|_                                    _|__
__|_   GGGG    RRRR    III    DDDD      _|__
__|_  G        R   R    I     D   D     _|__
__|_  G        R   R    I     D    D    _|__
__|_  G  GG    RRRR     I     D    D    _|__
__|_  G   G    R  R     I     D   D     _|__
__|_   GGGG    R   R   III    DDDD      _|__
__|_                                    _|__
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
  |  |  |  |  |  |  |  |  |  |  |  |  |  |  


Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors

This program is free software; you can redistribute it and/or modify
it under the terms of the 

In [2]:
fn_idx = "/data1/qcddata2/48I/psel-prop-psrc-light/traj-1122.qar.idx"
fn_qar = "/data1/qcddata2/48I/psel-prop-psrc-light.qar"

In [16]:
class qar_prop_io:
    def __init__(self,path):
        self.path = path
        self.fdimensions = []
        self.bytes_header = -1
        self.verbose = g.default.is_verbose("io")
        self.size = 1
        self.header_length = 24
        self.data_start_position = 0
        self.trajectory_offsets = {} #dict for the byte offsets within each trajectory .qar file
        self.file_entries = [] #list to hold dictionaries for each file type within the main .qar file
        g.barrier()

    def read_header(self):
        #ensure it is a file
        if not os.path.isfile(self.path):
            g.message("File does not exist")
            return False

        with open(self.path, "rb") as f:
            try: 
                line = self.getline(f) #grabs first line of the file looking for some sort of header
            except UnicodeDecodeError:
                return False

            #if this hits we are reading the index file, not the main qar file. 
            if line == "#!/usr/bin/env qar-idx-glimpse":
                g.message("reading index file")
                #do something else related to the index file here. 
                
            elif line == "#!/usr/bin/env qar-glimpse":
                g.message("reading main .qar file")
                

            if self.verbose:
                g.message(f"Qlattice file format; reading {self.path}")
                g.message(f"   {line}")

            header = True

            while header:
                line = self.getline(f)
                if self.verbose:
                    g.message(f"\t{line}")

                if line == "END_HEADER":
                    self.data_start_position = f.tell()
                    header = False

                    if self.verbose:
                        g.message(f"Header ends at byte position: {self.data_start_position}")
                    

    def getline(self, f):
        return f.readline().decode("utf-8").strip()

    def read_prop_data(self):
        #after the header the data is binary propagator data. 
        # We need to know the format of the binary data, and also the GPT type that the propagators need to be loaded into

        
        return 0
        
        

In [55]:
#goal of this class is to fill the prop_offsets dictionary with byte locations for all propagator data for each point
#in a single trajectory. 
class qar_idx_prop_io:
    def __init__(self, path):
        self.path = path
        self.prop_offsets = {} #dictionary to hold propagator byte offsets for a particular trajectory.
        self.verbose = False #g.default.is_verbose("io")

    def read_idx(self):
        if not os.path.isfile(self.path):
            g.message("file does not exist")
            return False

        with open(self.path, "rb") as f:
            current_entry = {}
            if self.verbose:
                g.message(f"Qlattice file format; reading {self.path}")
            
            for line in f:
                try:
                    line = line.decode("utf-8").strip()
                except UnicodeDecodeError:
                    return False

                if not line:
                    continue

                #line = self.getline(f).strip()
                
                if self.verbose:
                    g.message(f"Index line: {line}") #have a peek at the line

                #start a block
                if line.startswith("QAR-FILE-IDX"):
                    parts = line.split() #split the line by whitespace, save as a list. This will always have four entries
                    if len(parts) >= 4:
                        current_entry = {
                            "type": parts[1],
                            "index": parts[2],
                            "format": parts[3],
                            "xg": None,
                            "accuracy": None,
                            "offsets": None,
                            "wsnk": False
                        }

                elif current_entry and current_entry["xg"]  is None:
                    if line == "checkpoint.txt":
                        current_entry["xg"] = "checkpoint.txt"
                        current_entry['accuracy'] = 0
                    else:    
                        parts = line.split(" ; ")
                        if len(parts) >= 4:
                            current_entry['wsnk'] = True
                        elif len(parts) >= 3:
                            current_entry['wsnk'] = False
                            
                        current_entry['xg'] = parts[0]
                        #current_entry['type'] = parts[1] It seems like this is already set in the first line. 
                        current_entry['accuracy'] = parts[2]

                elif current_entry and current_entry["offsets"] is None:
                    offsets = [int(x) for x in line.split()]
                    current_entry['offsets'] = offsets


                    #composite key to look at all variations for a given position[
                    key = f"{current_entry['xg']};{current_entry['accuracy']}"
                    
                    self.prop_offsets[key] = {
                        "offsets": offsets,
                        "xg":current_entry['xg'],
                        "wsnk": current_entry["wsnk"],
                        "accuracy": current_entry["accuracy"], #maybe add more if necessary
                        "index": current_entry["index"],
                    }
                    
                    
    def getline(self, f):
        return f.decode("utf-8").strip()

In [56]:
q_idx = qar_idx_prop_io(fn_idx)
q_idx.read_idx()

In [59]:
entry = q_idx.prop_offsets.get("xg=(0,2,24,26);accuracy=1")
if entry:
    print(f"Offsets: {entry['offsets']}")
    print(f"Wall sink: {entry['wsnk']}") 
    print(f"Accuracy: {entry['accuracy']}")
else:
    print("Propagator not found")

Propagator not found


In [54]:
# Find all variants of a point
xg_point = "xg=(0,1,38,62)"
for key in q_idx.prop_offsets:
    if xg_point in key:
        print(f"{key}: {q_idx.prop_offsets[key]} \n")

xg=(0,1,38,62);wall_sink:True;accuracy=0: {'offsets': [62, 83, 131, 132, 221451, 47, 0, 221317], 'xg': 'xg=(0,1,38,62)', 'wsnk': True, 'accuracy': 'accuracy=0', 'index': '1'} 

xg=(0,1,38,62);wall_sink:False;accuracy=0.lat: {'offsets': [221451, 221473, 221514, 221515, 4940245, 40, 0, 4718728], 'xg': 'xg=(0,1,38,62)', 'wsnk': False, 'accuracy': 'accuracy=0.lat', 'index': '2'} 

xg=(0,1,38,62);wall_sink:True;accuracy=1: {'offsets': [4940245, 4940266, 4940314, 4940315, 5161634, 47, 0, 221317], 'xg': 'xg=(0,1,38,62)', 'wsnk': True, 'accuracy': 'accuracy=1', 'index': '3'} 

xg=(0,1,38,62);wall_sink:False;accuracy=1.lat: {'offsets': [5161634, 5161656, 5161697, 5161698, 9880428, 40, 0, 4718728], 'xg': 'xg=(0,1,38,62)', 'wsnk': False, 'accuracy': 'accuracy=1.lat', 'index': '4'} 



In [47]:
for i,key in enumerate(q_idx.prop_offsets.keys()):
    print(f"{i}, {key}")

0, checkpoint.txt;wall_sink:False;0
1, xg=(0,1,38,62);wall_sink:True;accuracy=0
2, xg=(0,1,38,62);wall_sink:False;accuracy=0.lat
3, xg=(0,1,38,62);wall_sink:True;accuracy=1
4, xg=(0,1,38,62);wall_sink:False;accuracy=1.lat
5, xg=(0,15,25,38);wall_sink:True;accuracy=0
6, xg=(0,15,25,38);wall_sink:False;accuracy=0.lat
7, xg=(0,16,24,84);wall_sink:True;accuracy=0
8, xg=(0,16,24,84);wall_sink:False;accuracy=0.lat
9, xg=(0,16,6,4);wall_sink:True;accuracy=0
10, xg=(0,16,6,4);wall_sink:False;accuracy=0.lat
11, xg=(0,2,24,26);wall_sink:True;accuracy=0
12, xg=(0,2,24,26);wall_sink:False;accuracy=0.lat
13, xg=(0,2,3,54);wall_sink:True;accuracy=0
14, xg=(0,2,3,54);wall_sink:False;accuracy=0.lat
15, xg=(0,20,4,69);wall_sink:True;accuracy=0
16, xg=(0,20,4,69);wall_sink:False;accuracy=0.lat
17, xg=(0,25,0,70);wall_sink:True;accuracy=0
18, xg=(0,25,0,70);wall_sink:False;accuracy=0.lat
19, xg=(0,25,7,43);wall_sink:True;accuracy=0
20, xg=(0,25,7,43);wall_sink:False;accuracy=0.lat
21, xg=(0,26,2,1);wall_

In [35]:
if len(q_idx.prop_offsets) == 0:
    print(f"dictionary is not populated")

dictionary is not populated


In [31]:
q = qar_prop_io(fn_idx)

In [None]:
q.read_header()

GPT :    6173.384978 s : Qlattice file format; reading /data1/qcddata2/48I/psel-prop-psrc-light/traj-1122.qar.idx
GPT :    6173.385459 s :    #!/usr/bin/env qar-idx-glimpse
GPT :    6173.385877 s : 	
GPT :    6173.386299 s : 	QAR-FILE-IDX 0 0 14
GPT :    6173.386638 s : 	checkpoint.txt
GPT :    6173.386994 s : 	28 44 59 60 62 14 0 0
GPT :    6173.387357 s : 	
GPT :    6173.387684 s : 	QAR-FILE-IDX 0 1 47
GPT :    6173.388013 s : 	xg=(0,1,38,62) ; type=0 ; accuracy=0 ; wsnk.lat
GPT :    6173.388418 s : 	62 83 131 132 221451 47 0 221317
GPT :    6173.388788 s : 	
GPT :    6173.389111 s : 	QAR-FILE-IDX 0 2 40
GPT :    6173.389495 s : 	xg=(0,1,38,62) ; type=0 ; accuracy=0.lat
GPT :    6173.389833 s : 	221451 221473 221514 221515 4940245 40 0 4718728
GPT :    6173.390129 s : 	
GPT :    6173.390503 s : 	QAR-FILE-IDX 0 3 47
GPT :    6173.390819 s : 	xg=(0,1,38,62) ; type=0 ; accuracy=1 ; wsnk.lat
GPT :    6173.391165 s : 	4940245 4940266 4940314 4940315 5161634 47 0 221317
GPT :    6173.39150

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



GPT :    6176.588111 s : 	QAR-FILE-IDX 0 2343 49
GPT :    6176.588373 s : 	xg=(31,45,30,45) ; type=0 ; accuracy=0 ; wsnk.lat
GPT :    6176.588614 s : 	5784957465 5784957486 5784957536 5784957537 5785178856 49 0 221317
GPT :    6176.588850 s : 	
GPT :    6176.589087 s : 	QAR-FILE-IDX 0 2344 42
GPT :    6176.589339 s : 	xg=(31,45,30,45) ; type=0 ; accuracy=0.lat
GPT :    6176.589586 s : 	5785178856 5785178878 5785178921 5785178922 5789897652 42 0 4718728
GPT :    6176.589827 s : 	
GPT :    6176.590066 s : 	QAR-FILE-IDX 0 2345 48
GPT :    6176.590319 s : 	xg=(31,46,3,49) ; type=0 ; accuracy=0 ; wsnk.lat
GPT :    6176.590565 s : 	5789897652 5789897673 5789897722 5789897723 5790119042 48 0 221317
GPT :    6176.590805 s : 	
GPT :    6176.591050 s : 	QAR-FILE-IDX 0 2346 41
GPT :    6176.591317 s : 	xg=(31,46,3,49) ; type=0 ; accuracy=0.lat
GPT :    6176.591570 s : 	5790119042 5790119064 5790119106 5790119107 5794837837 41 0 4718728
GPT :    6176.591793 s : 	
GPT :    6176.592016 s : 	QAR-FILE

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



GPT :    6179.779809 s : 	
GPT :    6179.780157 s : 	
GPT :    6179.780530 s : 	
GPT :    6179.780882 s : 	
GPT :    6179.781258 s : 	
GPT :    6179.781637 s : 	
GPT :    6179.782081 s : 	
GPT :    6179.782478 s : 	
GPT :    6179.782939 s : 	
GPT :    6179.783398 s : 	
GPT :    6179.783856 s : 	
GPT :    6179.784320 s : 	
GPT :    6179.784747 s : 	
GPT :    6179.785210 s : 	
GPT :    6179.785745 s : 	
GPT :    6179.786604 s : 	
GPT :    6179.793442 s : 	
GPT :    6179.793965 s : 	
GPT :    6179.794689 s : 	
GPT :    6179.795093 s : 	
GPT :    6179.795579 s : 	
GPT :    6179.796245 s : 	
GPT :    6179.796656 s : 	
GPT :    6179.797088 s : 	
GPT :    6179.797482 s : 	
GPT :    6179.797916 s : 	
GPT :    6179.798345 s : 	
GPT :    6179.798831 s : 	
GPT :    6179.799293 s : 	
GPT :    6179.799675 s : 	
GPT :    6179.800059 s : 	
GPT :    6179.800420 s : 	
GPT :    6179.800779 s : 	
GPT :    6179.801964 s : 	
GPT :    6179.802336 s : 	
GPT :    6179.802667 s : 	
GPT :    6179.803051 s : 	
G

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



GPT :    6183.035480 s : 	
GPT :    6183.036796 s : 	
GPT :    6183.038235 s : 	
GPT :    6183.039780 s : 	
GPT :    6183.041615 s : 	
GPT :    6183.042541 s : 	
GPT :    6183.043947 s : 	
GPT :    6183.045273 s : 	
GPT :    6183.046648 s : 	
GPT :    6183.048032 s : 	
GPT :    6183.049417 s : 	
GPT :    6183.050765 s : 	
GPT :    6183.052127 s : 	
GPT :    6183.053508 s : 	
GPT :    6183.054887 s : 	
GPT :    6183.056263 s : 	
GPT :    6183.057664 s : 	
GPT :    6183.059019 s : 	
GPT :    6183.060400 s : 	
GPT :    6183.061754 s : 	
GPT :    6183.063435 s : 	
GPT :    6183.064332 s : 	
GPT :    6183.065558 s : 	
GPT :    6183.066809 s : 	
GPT :    6183.068109 s : 	
GPT :    6183.069400 s : 	
GPT :    6183.070651 s : 	
GPT :    6183.071932 s : 	
GPT :    6183.073234 s : 	
GPT :    6183.074522 s : 	
GPT :    6183.075777 s : 	
GPT :    6183.077028 s : 	
GPT :    6183.078391 s : 	
GPT :    6183.079760 s : 	
GPT :    6183.081076 s : 	
GPT :    6183.082368 s : 	
GPT :    6183.084042 s : 	
G

In [61]:
'''
There are two file types here, a .qar file, and a .qar.idx file. The .qar file (presumably) holds all of the actual propagator data and the .idx file 
holds the indices and byte locations of specific propagators for each configuration. for example on the UConn server, there is a psel-prop-psrc-light.qar
file, and the index files exist only for individual configurations, such as traj-1122.qar.idx. 

There is a header for the .qar file, where it tells us byte information about the first configuration, checkpoint.txt file, and an individual point
we see in the .qar header that a full configuration has a byte offset of 10512714934, and a single point has byte offset 221317, along with some
additional data. the .idx file 'header' lists the information about each individual point, giving us a few byte offsets. 

the light psel psrc props, psel-prop-psrc-light.qar is 494097601961 bytes, the single configuration index file, traj-1122.idx is 581953 bytes.

We have one reader for the index files for a given trajectory. The code seems to be working. One issue is that the key to the 
dictionary we are saving for each trajectory is the point xg, but there are 4 different blocks in the index file for each point. 
This must be fixed by adding a second key perhaps. - composite key, the accuracy is included in the key since the accuracy is a degree of freedom
and because we can tell the truth value of the wall sink variable from the form of the accuracy, for wall sinks the acucracy is just a number, for point sink
the accuracy is a number, 0 or 1, followed by .lat. 

The goal is to use the byte offsets in this dictionary from the index file to load the data from the full .qar file.

I think the process is as follows:

1. in the contraction code we load in a set of points, and create a dictionary for the sets of points we want the propagators from
   so we should be able to write the contraction code to get the points, then with the points we can write some code to load in the propagators
   from one point to another. 

2. For the trajectory we are calculating a correlation function in, we load in that trajectory's index file, which tells us all of the byte offsets 
   for the propagators locations in the main .qar file.

3. we load in just the piece of the main .qar file for the trajectory we are dealing with, and then we use the offsets in the idx dictionary
   to load in specific propagators for specific points on the lattice. 

''';

In [25]:
with open(fn_qar, "rb") as f:
    f.seek(0,2)
    file_size = f.tell()
    print(f"filesize: {file_size} bytes")

filesize: 494097601961 bytes


In [26]:
with open(fn_idx, "rb") as f:
    f.seek(0,2)
    file_size = f.tell()
    print(f"filesize: {file_size} bytes")

filesize: 581953 bytes


In [27]:
10512714934/221317

47500.71135068702

In [28]:
2128*2128

4528384

In [None]:
#load the binary data for a single trajectory.

#loading code structure
