In [1]:
# Import libraries
import numpy as np
import pandas as pd
import time
from time import strftime
from time import gmtime

In [2]:
# Define repo 
root = 'C:/Users/Consultant/Desktop/Data/'
repoData = 'C:/Users/Consultant/Desktop/Data/'
repoCon = root + 'NAT/'
filename = 'z5700015________zr_57_83015_____dcu4____________dcucbmdata______190415_043212.bin' 

In [3]:
# Load table
IDStation = 0

#Load data
f = open(repoData + filename,"rb")

In [4]:
# Read Data and Store Binary Data
strbyte = f.read()
f.close()

In [5]:
#Used in cycling_write_out
def find_all(s, c):
    index_list = []
    idx = s.find(c)
    index_list.append(idx)
    
    while idx != -1:        
        idx = s.find(c, idx + 1)
        index_list.append(idx)

    return index_list[:-1]

In [6]:
### Change format from 0bxxx (with  mask) to 00010100 (without mask)
def octetfull(s):

    buffer = '0'*(8-(len(s)-2))
    return buffer+s[2:]

In [7]:
# Based on table 3.3 of documentation. return the number of byte need for a specific type of data.
def switchtype(x):
    return {
        0: 1,
        1: 1,
        2: 8,
        3: 8,
        4: 16,
        5: 16,
        6: 32,
        7: 32,
        8: 64,
        9: 64,
        10: 32,
        11: 64,
    }[x]

In [221]:
body = strbyte[64:2216]
body_list = body.split(b'\xca')
body_list_sample = body_list[:-2] 
test = body_list[-2].split(b'\xfe')[:-1]
test

[b'\x1aUnique serial number\x00\x0c\x01\x00',
 b'7Device instance indication enumeration in the car\x00\x02\x01\x01',
 b'\x1fDevice type e.g   DCU= 85\x00\x02\x01U',
 b'/Customer Code for the current trip. Utf-8\x00\x0c\x01G',
 b'$Previous station identifier\x00\x06\x04AAP\x00',
 b' Next station identifier\x00\x06\x04LEM\x00',
 b'\x1eOutside Air Temperature\x00\x04\x02?\x00']

In [8]:
# Class which initializes and structures the binary data (Input : BinFile, Output : Understable Data )
class InputData:    
    
    # Splits Data in 3 Blocks : Header, Body and Data
    def __init__(self, strbyte):
        self.header = strbyte[0:26]
        self.body = strbyte[64:2216]
        self.footer = strbyte[4160:-1]
       
    def header_decode(self):
        self.header_info = []
        #assign nth (byte -> int) to an attribute 
        self.cbm_version = int.from_bytes(self.header[0:2], byteorder='little', signed=False)
        self.header_info.append(['Version du CBM', self.cbm_version])
        self.nb_oct_allocated_IOs = int.from_bytes(self.header[2:3], byteorder='little', signed=False)
        self.header_info.append(['Nombre Octets Alloués pour les IOs', self.nb_oct_allocated_IOs])
        self.nb_oct_allocated_analogs = int.from_bytes(self.header[3:4], byteorder='little', signed=False)
        self.header_info.append(['Nombre Octets Alloués pour les Analogs', self.nb_oct_allocated_analogs])
        self.nb_vars_echantillon = int.from_bytes(self.header[4:6], byteorder='little', signed=False)
        self.header_info.append(['Nombre de Variables Echantillons', self.nb_vars_echantillon])
        self.nb_vars_EnteteSpec = int.from_bytes(self.header[6:8], byteorder='little', signed=False)
        self.header_info.append(['Nombre de Variables Entêtes Specifiques', self.nb_vars_EnteteSpec])
        self.cycle_writing_area_size = int.from_bytes(self.header[8:12], byteorder='little', signed=False)
        self.header_info.append(['Taille de la zone de data (octet)', self.cycle_writing_area_size])
        self.nb_writing = int.from_bytes(self.header[12:16], byteorder='little', signed=False)
        self.header_info.append(['Nombre Enregistrements', self.nb_writing])
        self.cbm_start_time = int.from_bytes(self.header[16:24], byteorder='little', signed=False)
        self.header_info.append(['Temps de démarrage (s)', self.cbm_start_time])
        self.cbm_acquisition_time = int.from_bytes(self.header[24:26], byteorder='little', signed=False)
        self.header_info.append(['Temps entre deux échantillons (ms)', self.cbm_acquisition_time])
    
    def body_decode(self):
        self.sample_variable = []
        self.spec_header_variable = []
        
        #\xca are delimiter for each sample variable
        body_list = self.body.split(b'\xca')
        
        #with this operation, we have a list of byte array [sample variable 1, sample variable 2 ... , block of spec_header variable, empty byte array] )
        #The delimiter of each element of spec_header is \xfe, again, the last element of the string is a empty byte array. 
        #Therefore, we dont considere the last element (-2 instead of -1)
        
        body_list_sample = body_list[:-2] 
        body_list_header = body_list[-2].split(b'\xfe')[:-1] 
        
        
        self.datasize = []
        self.data_oct_position = []
        self.data_byte_position = []
        
        for element in body_list_sample:
            subframe = []
            subframe.append(['size',element[0]])
            subframe.append(['name',element[1:-3].decode('latin-1')])
            subframe.append(['type',element[-3]])
            subframe.append(['bitposition',element[-2]])
            subframe.append(['sampleposition',element[-1]])
            
            self.datasize.append(switchtype(element[-3]))
            self.data_oct_position.append(element[-1])
            self.data_byte_position.append(element[-2])
            
            self.sample_variable.append(subframe)
            
        for element in body_list_header:
            subframe = []
            subframe.append(['size',element[0]])
            
            #the name of each variable end with \x00
            index = element.find(b'\x00')
            subframe.append(['name',element[1:index+1]])
            subframe.append(['type',element[index+1]])
            subframe.append(['sizevalue',element[index+2]])
            subframe.append(['value',element[index+3:index+3 + element[index+2]]])
            
            self.spec_header_variable.append(subframe)

            
    def footer_decode(self):
        self.measurement_data = []
        footer_size = len(self.footer)
        
        
        #each frame has at least 5 character
        min_size_per_frame = 5
        
        index = 0
        while index < self.cycle_writing_area_size :
            
            frame = []
            subframe = []
                                    
            frame.append(['U8Transition',hex(self.footer[index])[-2]])
            frame.append(['U8SleepMode',hex(self.footer[index])[-1]])
            ms = int.from_bytes(self.footer[index+1:index+4],byteorder='little', signed=False)
            s = ms / 1000
            timestamp = strftime("%H:%M:%S", gmtime(s))
            frame.append(['U24TimeStamp', timestamp])
            
            ### int -> bin -> str ---> filter passing the 7 last byte ---> count number of 1
            control_byte = bin(self.footer[index+4]) #defines control byte
            nb_control_byte = str(control_byte)[1:].count("1") #counts number of 1 in control byte to define auZipDBitData
            frame.append(['u8ZipControlBits',nb_control_byte, control_byte])
            
            print('Control Byte = ' + control_byte)
            
            buffer = '0'*8
            
            data_bit = ''
        
            flip_control_byte = control_byte[::-1]
            pointer = 0
            for i in range(len(flip_control_byte)-2):
                
                
                if flip_control_byte[i] == '0':
                
                    data_bit =  buffer + data_bit
                    
                else:
                    
                    ### I cant convert data directly to hex to binary, hence I do
                    ### hex -> integer -> binary  ---> count number of 1
                    data_oct = bin(int.from_bytes(data.footer[index+5+pointer:index+5+pointer+1],byteorder='little', signed=False))
                    data_oct = octetfull(data_oct)                
                    
                    pointer = pointer +1
                    data_bit = data_oct + data_bit
                                         
            print('ZipBitData = ' + data_bit)               
                  
            nb_data_bit = data_bit.count('1') #counts number of 1 in auZipBitData         
            frame.append(['auZipBitData',nb_data_bit,data_bit])
            
            subindex = 0
            while subindex < nb_data_bit :
                subframe_bin = octetfull(bin(self.footer[index+min_size_per_frame +nb_control_byte+subindex]))
                subframe.append([subframe_bin])
                subindex = subindex + 1
                
            frame.append(["measurement",subframe])
            self.measurement_data.append(frame)
            index = index + min_size_per_frame  +nb_control_byte +subindex
    

    def cycling_write_out(self):
        
        self.datablock = []
        # we build an empty matrix for ios and analog data. 
        dataline = "0"*self.nb_oct_allocated_IOs*8 + "0"*self.nb_oct_allocated_analogs*8 
        
        
        for i in range(self.nb_writing):
        
                #find all the position of 1 in the octets 
                #octet is read from right to left.
                index_list = find_all(self.measurement_data[i][4][-1][::-1], '1')
   
                for j in range(len(index_list)):
                    
                    ## data:  int -> bin -> fullbin -> flip because the bin is read from left to right
                    new_write = octetfull(self.measurement_data[i][5][-1][j][0])
                    
                    ### rewrite the chosen octet
                    dataline = dataline[0:index_list[j]*8] + new_write[::-1] + dataline[index_list[j]*8+8:]
                    
                    dataline_list = []
                    
                    ### we need to arrange the data according to their format.
                    for k in range(len(self.datasize)):
                            
                        dataline_element = dataline[(self.data_oct_position[k]*8+self.data_byte_position[k]):(self.data_oct_position[k]*8 + self.data_byte_position[k])+self.datasize[k]][::-1]    
                        dataline_element = int(dataline_element,2)    
                            
                        dataline_list.append(dataline_element)
                    
                    
                self.datablock.append(dataline_list)    

In [9]:
data = InputData(strbyte)

In [10]:
data.header_decode()
data.body_decode()
data.footer_decode() 
data.cycling_write_out()

Control Byte = 0b1
ZipBitData = 00011001
Control Byte = 0b0
ZipBitData = 00000000
Control Byte = 0b11
ZipBitData = 0100000000000100
Control Byte = 0b0
ZipBitData = 00000000
Control Byte = 0b0
ZipBitData = 00000000
Control Byte = 0b1
ZipBitData = 00000010
Control Byte = 0b1
ZipBitData = 00000010
Control Byte = 0b1
ZipBitData = 00000100
Control Byte = 0b111
ZipBitData = 000001110100000000111000
Control Byte = 0b110
ZipBitData = 000001010100000000000000
Control Byte = 0b110
ZipBitData = 000001010100000000000000
Control Byte = 0b110
ZipBitData = 000001110100000000000000
Control Byte = 0b110
ZipBitData = 000001111100000000000000
Control Byte = 0b110
ZipBitData = 000001011100000000000000
Control Byte = 0b110
ZipBitData = 000001111100000000000000
Control Byte = 0b110
ZipBitData = 000001111100000000000000
Control Byte = 0b110
ZipBitData = 000001111100000000000000
Control Byte = 0b110
ZipBitData = 000001011100000000000000
Control Byte = 0b110
ZipBitData = 000011110100000000000000
Control Byte =

NameError: name 'find_all' is not defined

In [12]:
data.header_info

[['Version du CBM', 1],
 ['Nombre Octets Alloués pour les IOs', 14],
 ['Nombre Octets Alloués pour les Analogs', 12],
 ['Nombre de Variables Echantillons', 50],
 ['Nombre de Variables Entêtes Specifiques', 7],
 ['Taille de la zone de data (octet)', 8095],
 ['Nombre Enregistrements', 767],
 ['Temps de démarrage (s)', 1555309908],
 ['Temps entre deux échantillons (ms)', 50]]

In [11]:
data.sample_variable

[[['size', 25],
  ['name', 'LT-V-DVR est active\x00'],
  ['type', 0],
  ['bitposition', 0],
  ['sampleposition', 0]],
 [['size', 23],
  ['name', 'LT-V<2 est active\x00'],
  ['type', 0],
  ['bitposition', 1],
  ['sampleposition', 0]],
 [['size', 23],
  ['name', 'LT-AO1 est active\x00'],
  ['type', 0],
  ['bitposition', 2],
  ['sampleposition', 0]],
 [['size', 23],
  ['name', 'LT-AO2 est active\x00'],
  ['type', 0],
  ['bitposition', 3],
  ['sampleposition', 0]],
 [['size', 23],
  ['name', 'LT-Ads est active\x00'],
  ['type', 0],
  ['bitposition', 4],
  ['sampleposition', 0]],
 [['size', 23],
  ['name', 'LT-Acq est active\x00'],
  ['type', 0],
  ['bitposition', 5],
  ['sampleposition', 0]],
 [['size', 22],
  ['name', 'LT-CF est active\x00'],
  ['type', 0],
  ['bitposition', 6],
  ['sampleposition', 0]],
 [['size', 23],
  ['name', 'LT-CF2 est active\x00'],
  ['type', 0],
  ['bitposition', 7],
  ['sampleposition', 0]],
 [['size', 26],
  ['name', 'LT-IH-UFR est active\x00'],
  ['type', 0],


In [13]:
df = pd.DataFrame(data.datablock)
df

In [14]:
data.spec_header_variable

[[['size', 26],
  ['name', b'Unique serial number\x00'],
  ['type', 12],
  ['sizevalue', 1],
  ['value', b'\x00']],
 [['size', 55],
  ['name', b'Device instance indication enumeration in the car\x00'],
  ['type', 2],
  ['sizevalue', 1],
  ['value', b'\x01']],
 [['size', 31],
  ['name', b'Device type e.g   DCU= 85\x00'],
  ['type', 2],
  ['sizevalue', 1],
  ['value', b'U']],
 [['size', 47],
  ['name', b'Customer Code for the current trip. Utf-8\x00'],
  ['type', 12],
  ['sizevalue', 1],
  ['value', b'G']],
 [['size', 36],
  ['name', b'Previous station identifier\x00'],
  ['type', 6],
  ['sizevalue', 4],
  ['value', b'AAP\x00']],
 [['size', 32],
  ['name', b'Next station identifier\x00'],
  ['type', 6],
  ['sizevalue', 4],
  ['value', b'LEM\x00']],
 [['size', 30],
  ['name', b'Outside Air Temperature\x00'],
  ['type', 4],
  ['sizevalue', 2],
  ['value', b'?\x00']]]

In [30]:
data.measurement_data

[[['U8Transition', '1'],
  ['U8SleepMode', '1'],
  ['U24TimeStamp', '00:00:00'],
  ['u8ZipControlBits', 1, '0b1'],
  ['auZipBitData', 3, '00011001'],
  ['measurement', [['00001110'], ['10000000'], ['00000001']]]],
 [['U8Transition', '2'],
  ['U8SleepMode', '1'],
  ['U24TimeStamp', '00:00:00'],
  ['u8ZipControlBits', 0, '0b0'],
  ['auZipBitData', 0, '00000000'],
  ['measurement', []]],
 [['U8Transition', '3'],
  ['U8SleepMode', '0'],
  ['U24TimeStamp', '00:06:01'],
  ['u8ZipControlBits', 2, '0b11'],
  ['auZipBitData', 2, '0100000000000100'],
  ['measurement', [['01000011'], ['00000001']]]],
 [['U8Transition', '4'],
  ['U8SleepMode', '1'],
  ['U24TimeStamp', '00:06:01'],
  ['u8ZipControlBits', 0, '0b0'],
  ['auZipBitData', 0, '00000000'],
  ['measurement', []]],
 [['U8Transition', '2'],
  ['U8SleepMode', '1'],
  ['U24TimeStamp', '00:06:01'],
  ['u8ZipControlBits', 0, '0b0'],
  ['auZipBitData', 0, '00000000'],
  ['measurement', []]],
 [['U8Transition', '2'],
  ['U8SleepMode', '1'],
  ['U2

In [154]:
#Create End DataFrame and Stores TimeStamp
final_data = pd.DataFrame(columns={'TimeStamp'})
timestamp = []
ind = 0
while ind < nb_ech :
    timestamp.append(data.measurement_data[:][ind][2][1])
    ind = ind + 1
final_data['TimeStamp'] = timestamp

In [153]:
#Export Final File as CSV
final_data.to_csv('door_data.csv', index=False)

In [33]:
label = data.measurement_data[:][2][2][0]
test = pd.DataFrame(columns={'TimeStamp'})

In [93]:
kek2

Unnamed: 0,Country,Capital,Population
0,Belgium,Brussels,11190846
1,India,New Delhi,1303171035
2,Brazil,Brasília,207847528


In [99]:
kek2.loc[1][0]

'India'

In [40]:
kek = {'Country': ['Belgium', 'India', 'Brazil'],
'Capital': ['Brussels', 'New Delhi', 'Brasília'],
'Population': [11190846, 1303171035, 207847528]}

In [41]:
kek2 = pd.DataFrame(kek,
columns=['Country', 'Capital', 'Population'])

In [None]:
nb_ech = len(data.measurement_data)

In [23]:
data.measurement_data[:][2][2][0]

'U24TimeStamp'

In [119]:
print(data.measurement_data[0][4][-1][::-1])

10011000


In [61]:
NewTarget = MyTarget.split()
print(NewTarget)

["'00:00:00'", '0', '1', '1', '1', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '1', '1', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'NaN', 'NaN', 'NaN', 'NaN', 'NaN', 'NaN', '1', '1', "'00:00:00'", '0', '1', '1', '1', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '1', '1', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', 'NaN', 'NaN', 'NaN', 'NaN', 'NaN', '1', '2', "'00:06:01'", '0', '1', '1', '1', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '1', '1', '0', '0', '0', '0', '1', '0', '0', '0', '0', '0', '0', '0', '0', '1', '1', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '1', 'NaN', 'NaN', 'NaN', 'NaN', 'NaN', '0', '3', "'00:06:01'", '0', '1', '1', '1', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '1', '1', '0', '0', '0', '0', '1', '0', '0', '0', '

In [111]:
NewList = []
for j in range(767):
    
    line = NewTarget[j*53:(j*53+53)]
    NewList.append(tuple(line))
    
label = list(range(53))

In [112]:
print(label)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52]


In [114]:
ok = pd.DataFrame.from_records(NewList, columns=label)

NameError: name 'NewList' is not defined