# Import Libraries

In [1]:
import csv, pprint, json, base64, zlib, struct

# Define Helper Functions

In [2]:
def base64ToJson(zippedString):
    json_str = zlib.decompress(base64.b64decode(zippedString)).decode()
    json_json = json.loads(json_str)
    return json_json

# Read In CSV File

In [3]:
!pwd

/home/alexchandra/Documents/Alex/capstone/adiConversion


In [4]:
!ls ./sampleFiles/4b80ff2eb1112815299d7a4e9a4a1957.csv

./sampleFiles/4b80ff2eb1112815299d7a4e9a4a1957.csv


In [5]:
!pwd

/home/alexchandra/Documents/Alex/capstone/adiConversion


#### For notes on CSV file format see comment at end of notebook

In [6]:
csvFilename = "./sampleFiles/4b80ff2eb1112815299d7a4e9a4a1957.csv"

In [7]:
import csv

f = open(csvFilename)
csv_f = csv.reader(f)

# Will print the first row of the CSV - the first alarm for that admission
'''
for row in csv_f:
    #print(row)
    print(row[0]) # Alarm ID
    print(row[1]) # Time since Admission
    pprint.pprint(base64ToJson(row[2])) # Alarm data
    break    
'''
'''
for row in csv_f:
    pprint.pprint(base64ToJson(row[2]))
    break
'''

'\nfor row in csv_f:\n    pprint.pprint(base64ToJson(row[2]))\n    break\n'

# Read In ADIBIN File

In [8]:
adibinFilename = "./sampleFiles/example.adibin"

### Define Format Strings for C Structs

#### [Relevant Python Documentation for Reading C Structs into Python](https://docs.python.org/3.5/library/struct.html "Python Docs for C Structs")

ADIBinaryFormat.h copied in comment at end of notebook

In [11]:
#Define Format Strings to decode Structs and Global Variables for ADI Files 

HEADER_LENGTH = 68
CHANNEL_TITLE_LENGTH = 96

ADI_FILE_HEADER_FORMAT_STRING = "=4sldlllllddllll"
sizeHeader=struct.calcsize(ADI_FILE_HEADER_FORMAT_STRING)

ADI_CHANNEL_TITLE_FORMAT_STRING = "=32s32sdddd"
sizeChannel=struct.calcsize(ADI_CHANNEL_TITLE_FORMAT_STRING)


# Sanity Check - File Header is 68 bytes and Channel Titles are 96 bytes
print(sizeHeader, sizeChannel)

68 96


### Parse ADIBIN File

#### [Excellent code snippets for working with Binary data in Python](https://www.devdungeon.com/content/working-binary-data-python "DevDungeon")

In [23]:
def parseChannels(byteBuffer):
    
    #Start at the beginning of the buffer
    byteBuffer.seek(0)
    
    #Parse File Header
    fileHeader = byteBuffer.read(HEADER_LENGTH)
    
    Magic\
    , Version\
    , secsPerTick\
    , Year\
    , Month\
    , Day\
    , Hour\
    , Minute\
    , Second\
    , trigger\
    , NChannels\
    , SamplesPerChannel\
    , TimeChannel\
    , DataFormat\
    = struct.unpack(ADI_FILE_HEADER_FORMAT_STRING, fileHeader)
    
    #sanity check
    print(Magic.decode('utf-8'))
    
    #Figure out length of rest of the file by checking DataFormat and SamplesPerChannel
    #DataFormat 1=double, 2=float, 3=16-bit integer
    #sizeof(double) = 8 bytes, sizeof(float) = 4 bytes, sizeof(16-bit integer) = 2 bytes
    if (DataFormat == 1):
        channelsLength = SamplesPerChannel*8
    elif (DataFormat == 2):
        channelsLength = SamplesPerChannel*4
    elif (DataFormat == 3): 
        channelsLength = SamplesPerChannel*2
    else:
        print('DataFormat Not Coded to 1,2,or 3 - Exception')
        channelsLength = 0
    
    #Parse Channel Titles
    for i in range(0, NChannels):
        
        #Read Channel Titles
        channelTitle = byteBuffer.read(CHANNEL_TITLE_LENGTH)
        
        ChannelTitle, Units, Scale, offset, RangeHigh, RangeLow \
        = struct.unpack(ADI_CHANNEL_TITLE_FORMAT_STRING, channelTitle)
        
        print(ChannelTitle.decode('utf-8'))
    
    #Parse Channel Signals
    for i in range(0, NChannels):
        
        #Read Channel Signal
        channelSignal = byteBuffer.read(channelsLength)
        
        print(channelSignal)
        print('\n\n\n\n\n\n\n\n')

In [24]:
#Read in adibin file, print
with open(adibinFilename, "rb") as adibin_file:
    #   
    parseChannels(adibin_file)

CFWB
I                               
II                              
III                             
V                               
AVR                             
AVL                             
AVF                             
AR2                             
SPO2                            
RR                              
b'\xc9\xff\x85\xff\xbd\xffG\x00Y\x00\x06\x00\xa1\xff\xfd\x00\xf2\x02\x00\x00\xe1\xff\xb9\xff\xd7\xffB\x003\x00\x05\x00\xc8\xff\xfc\x00\xe5\x02\x00\x00\x1c\x00\x1c\x00\x00\x00\x1e\x00\xe4\xff\x0e\x00\x0e\x00\xfb\x00\xe5\x02\x00\x00x\x00\x9b\x00$\x00\xdb\xffw\xff*\x00_\x00\xfb\x00\xd8\x02\x00\x00\xe7\x00\x1f\x017\x00\x81\xff\xfd\xfeX\x00\xab\x00\xfa\x00\xca\x02\x02\x00V\x01\x9d\x01F\x00\'\xff\x87\xfe\x88\x00\xf1\x00\xf9\x00\xbd\x02\x02\x00\xaa\x01\x13\x02h\x00\xe2\xfe"\xfe\xa1\x00=\x01\xf8\x00\xbd\x02\x02\x00\xbd\x01w\x02\xbc\x00\xbe\xfe\xe6\xfd\x80\x00\x99\x01\xf8\x00\xb0\x02\x02\x00\x8b\x01\xbe\x024\x01\xb4\xfe\xdc\xfd+\x00\xf9\x01\xf7\x00\xa2\x02\x08\x00D\

In [None]:
#Read in File Header and First Channel Title
with open(adibinFilename, "rb") as adibin_file:
    # Read the whole file at once
    #data = binary_file.read()
    #print(data)
    
    adibin_file.seek(0)  # Go to beginning
    fileHeader = adibin_file.read(68)
    #channelTitle = adibin_file.read(96)
    print(fileHeader)
    #print('\n')
    #print(channelTitle)

In [None]:
Magic, Version, secsPerTick, Year, Month, Day, Hour, Minute, Second, trigger, NChannels, SamplesPerChannel,\
    TimeChannel, DataFormat = struct.unpack(fileHeaderFormatString, fileHeader)

In [None]:
print(NChannels)

In [None]:
len(fileHeader)

In [None]:
len(channelTitle)

In [None]:
tupleFileHeader = struct.unpack(fileHeaderFormatString, fileHeader)
tupleChannelHeader = struct.unpack(channelTitleFormatString, channelTitle)

In [None]:
print(tupleFileHeader)
print('\n')
print(tupleChannelHeader)

In [None]:
ChannelTitle, Units, Scale, offset, RangeHigh, RangeLow = struct.unpack(channelTitleFormatString, channelTitle)
print(ChannelTitle)
print('\n')
print(ChannelTitle.decode('utf-8'))

In [None]:
'''
Note from Jacob Abba defining data format for CSV Patient ECG Files:


The csv is a sample of the format that I’ll be providing. 

Each CSV file will represent a single admission in a bed, 
and its filename is an encrypted version of that admission’s ID in our database.
 
Each row in the CSV represents a PVC-type alarm from that admission. 
The first column is an encrypted version of the alarm’s ID in our database. 
The second column is the time that the alarm occurred (in number of seconds since the start of the admission). 
The third column is the strip data for that alarm, which has been deflated and base64 encoded. 
Note that the rows in these files aren’t sorted in any particular order.
 
To get the strip data, simply use a library in your language of choice to read the base64 string to a buffer, 
deflate the buffer, and then encode the buffer into a string. In nodejs it looks something like this:
 
let zlib = require(‘zlib’);
 
let s = strip; //load strip
let buf = Buffer.from(s, 'base64'); //read strip into buffer
let json = zlib.unzipSync(buf).toString(); //deflate buffer and convert to a string
 
Once decoded, the strip is an array in JSON format and looks something like this (without the pretty print):
 
[
    {"Label":"I","ID":"7","Text":"-263,-211,-146,-108,-68,-35,3,…”},
    {"Label":"II","ID":"8","Text":"-312,-272,-220,-177,-137,-111,…”},
    …
]
 
Pretty self-descriptive; each object within the array represents a channel where 
the “Label” field contains the label, 
the “Text” field contains the waveform (with each sample separated by a comma). 
You can probably ignore the “ID” field, it’s just used internally by bedmaster.
 
A few notes on these strips:
1)      In our meeting Xiao and Ran said that they contain data 5 seconds before and 5 seconds after an alarm. 
As far as I can tell this is not correct; they seem to only contain data 10 seconds before the alarm. 
For your purposes this shouldn’t matter, but it’s good to know.

2)      Sampling frequency of the channels is always 240hz.

3)      Some files may contain different numbers of channels. 
Originally we were planning on only using files that had the 
8 channels (‘I’, ‘II’, ‘III’, ‘V’, ‘AVR’, ‘AVL’, ‘AVF’, and ‘SPO2’), 
and discarding other channels. But Ran and Xiao would be able to guide you better on how to handle this.

 
For the rules to automatically sort pvc alarms into true- and false-positive categories, 
you’ll also need timing of artifact alarms for each admission. 
I’ll work on a script to generate these as well, but it should be simpler since they don’t need to include strips. 
My plan is to just have text files for each admission (same name as the csv) 
with each line representing the relative timing of that alarm.
 
You can see Xiao’s earlier email with documentation on the adibin file format.
 
Hopefully this info is helpful to you, 
if anything is unclear or you need guidance on data conversion/reading the csv then 
feel free to reach out via text or email!
 
Jacob
'''

print('')

In [None]:
'''
Code in ADIBinaryFormat.h that shows the structure of the FileHeader and Channel Titles

/***************************************************************************
 * Translate Binary for LabChart for Windows
 *
 * ADIBinaryFormat.h
 *
 * Copyright (c) 2001-2009 ADInstruments Ltd.
 *
 * Translate Binary is a LabChart for Windows extension that enables data to be
 * moved to and from LabChart, in a simple binary format.
 *
 * A LCfW binary file has the following structure:
 *
 *  - A 68 byte file header (CFWBINARY structure) containing basic information
 *   about the data such as the sampling period, number of channels, trigger
 *   time, data format.
 *
 *  - For each channel, a 96 byte channel header (CFWBCHANNEL structure)
 *   containing information about the channel.
 *
 *  - The interleaved channel data. Data can be either double precision
 *   floating point, single precision floating point or 16 bit integer, as
 *   specified by the DataFormat parameter of the file header.
 *
 *
 * The CFWBINARY and CFWBCHANNEL structures are defined below, along with a
 * simple program which creates a two channel CfW binary file.
 *
 * Note that the following types must have the indicated size and that the
 * program needs to be run on a little endian machine (e.g. x86):
 *
 * sizeof(char)   = 1 byte
 * sizeof(short)  = 2 bytes
 * sizeof(long)   = 4 bytes
 * sizeof(double) = 8 bytes, i.e. 64 bit IEEE floating point
 *
 ******************************************************************************/
#ifndef _ADIBinFormat
#define _ADIBinFormat

#define CHANNEL_TITLE_LEN  32
#define UNITS_LEN          32
#define CFWB_VERSION       1

enum 
   {
   kBinFmtDouble  =  1,
   kBinFmtFloat,
   kBinFmtInt16,
   };

// The file and header structures must be packed on 1-byte boundaries.
// On Visual C++ the following pragma enforces this.
#pragma pack(1)

// Each LabChart for Windows binary file starts with the following structure:
struct CFWBINARY
   {
   char     magic[4];            // always "CFWB"
   long     Version;             // = CFWB_VERSION
   double   secsPerTick;         // sampling interval in seconds

   // Trigger Date and time information
   long    Year;                // 4 digit year
   long    Month;               // months 1 - 12
   long    Day;                 // days 1 - 31
   long    Hour;                // hours 0 - 23
   long    Minute;              // minutes 0 - 59
   double   Second;              // seconds
   double   trigger;             // Amount of pretrigger data in seconds.

   long     NChannels;           // Number of channels
   long     SamplesPerChannel;   // Number of sample points per channel
   
   // The TimeChannel flag indicates that the sample time of each sample is
   // interleaved as the first column of data. This is only valid for the floating
   // point data formats. For all releases of TranslateBinary up to and including
   // v1.3, sample time data can be included but it is not used.
   long     TimeChannel;         // 1 = time included as first channel, 0 = not included
   long     DataFormat;          // 1 = double , 2 = float, 3 = 16-bit integer
   };

// Then one of these for each of the 'NChannels':
struct CFWBCHANNEL
   {
   char     Title[CHANNEL_TITLE_LEN];  // Channel title string
   char     Units[UNITS_LEN];          // Channel units string

   // scale and offset are used to convert 16-bit samples into user units,
   // where  data = scale * (sample + offset)
   double   scale;                     // scale (= 1.0 for floating point data)
   double   offset;                    // offset (= 0.0 for floating point data)

   // The maximum and minimum values of the data
   // (not used in TranslateBinary up to and including v1.3)
   double   RangeHigh;
   double   RangeLow;
   };

// Back to default data structure packing
#pragma pack()

#endif   // sentinelRangeLow;
   };

// Back to default data structure packing
#pragma pack()

#endif   // sentinel

'''
print('')

In [None]:
with open(adibinFilename, "rb") as binary_file:
    data = binary_file.read()
    
    binary_file.seek(0)
    magic = binary_file.read(4)
    print(magic)

In [None]:
with open(adibinFilename, "rb") as binary_file:
    data = binary_file.read()
    
    binary_file.seek(0)
    magic = binary_file.read(4)
    version = binary_file.read(4)
    intVersion = int.from_bytes(version, byteorder='little')
    secsPerTick = binary_file.read(8)
    intSecsPerTick = int.from_bytes(secsPerTick, byteorder='little')
    print(magic, version, intVersion, secsPerTick, intSecsPerTick)
    

In [None]:
import sys
#reload(sys)
#sys.setdefaultencoding('utf-8')
sys.getdefaultencoding()

In [None]:
with open(adibinFilename, "rb") as binary_file:
    # Read the whole file at once
    data = binary_file.read()
    #print(data)
    
    binary_file.seek(0)  # Go to beginning
    couple_bytes = binary_file.read(68)
    print(couple_bytes.decode('utf-8'))

In [None]:
binary_data = b'test'
text = binary_data.decode('utf-8')
print(text)

In [None]:
print(binary_data)

In [None]:
base64_data = codecs.encode(binary_data, 'base64')

In [None]:
print(type(binary_message))

In [None]:
print(type(base64_data))

In [None]:
print(base64_data)