In [73]:
from datetime import date, datetime

In [20]:
fileName = "data.csv"

In [52]:
# Return: validLines, numberLines
openDict = {}    # Dictionary that contains <processID, lineNumber> for a start process.
validLines = []  # Lines numbers of valid lines in file


with open(fileName, 'r') as readFile:
    lineNumber = 0
    for line in readFile:

        # START line found => add {processID: lineNumber} to openDict
        if line.startswith('$START'):
            processID = line.split(',')[1]
            openDict[processID] = lineNumber

        # END line found:
        #     - Check if start entry for processID exists
        #     - Add line of START and END entry to 'validLines'
        #     - Delete START entry from dict openDict
        elif line.startswith("$END"):
            processID = line.split(',')[1]
            if processID in openDict:
                validLines.append(openDict[processID])
                validLines.append(lineNumber)
                del openDict[processID]
            
        lineNumber = lineNumber + 1

numberLines = lineNumber;
print("Total number of lines = " + str(lineNumber))
print("Valid lines = " + str(len(validLines)))
print("Non-terminated processes = " + str(len(openDict)))

Total number of lines = 5
Valid lines = 4
Non-terminated processes = 1


In [115]:
lineValid = [False] * numberLines
for idx in validLines:
    lineValid[idx] = True
    
noInstances = 0      # Counts how many instances are open at the same time    
timeMap = {}      # {ProcessID: time}
timeVector = []
toolboxes = []

with open(fileName, 'r') as readFile:
    lineNumber = 0
    for line in readFile:
        if lineValid[lineNumber]:
            
            # ------------------------------------------------------------
            # START line found => add {processID: lineNumber} to openDict
            # ------------------------------------------------------------
            if line.startswith('$START'):
                line = line.rstrip('\n')  # remove \n at end of line
                
                # Get process ID and time stamp
                substr = line.split(',')  # split in substrings
                processID = substr[1]     # get process ID
                timeStamp = substr[4]     # get time stamp and convert to datetime format
                timeStamp = datetime.strptime(timeStamp, "%d.%m.%Y %H:%M")
                
                # Add process ID and time stamp to dict
                timeMap[processID] = timeStamp
                
                # Increase number of active instances
                noInstances = noInstances + 1
        
            # -----------------------------------------------------
            # END line found:
            # -----------------------------------------------------
            
            # If the last active instance:
            #    - Process time stamps and write output data
            # else
            #    - Temporarily save [start, end time] and associated toolboxes
        
            elif line.startswith("$END"):
                
                # Get data
                line = line.rstrip('\n')  # remove \n at end of line
                
                substr = line.split(',')  # split in substrings
                
                processID = substr[1]
                licenseNo = substr[2]
                version   = substr[3]
                endTime = substr[4]
                endTime = datetime.strptime(endTime, "%d.%m.%Y %H:%M")
                tbox = substr[5].strip('][').split(':')
                print(tbox)
                
                # Add start - end time and toolboxes
                if not processID in timeMap:
                    print("ERROR: (line number = " + str(lineNumber) + ")" )
                    print("No start time found for the end timestamp.")
                    #return
                else:
                    startTime = timeMap[processID]
                    timeVector.append(startTime)
                    timeVector.append(endTime)
                     
                    toolboxes.append(tbox)
                    
                noInstances = noInstances - 1
                
                # If was last active instance => process data
                if noInstances == 0:
                    
                    tboxesFlat = [item for sublist in toolboxes for item in sublist]
                    uniqueToolboxes = list(set(tboxesFlat)) 
                    print(uniqueToolboxes)
                       
                    for tbox in uniqueToolboxes:
                        timeVecTbx = []
                        
        
        
        lineNumber = lineNumber + 1;

['matlab', 'simulink']
['matlab']
['matlab', 'simulink']


In [113]:
print(timeVector)
print(toolboxes)

[datetime.datetime(2020, 6, 16, 16, 30), datetime.datetime(2020, 6, 16, 16, 45), datetime.datetime(2020, 6, 16, 16, 22), datetime.datetime(2020, 6, 16, 18, 5)]
[['matlab', 'simulink'], ['matlab']]


In [168]:
def mergeTimeStamps( timeStamps ):
    mergedTimeArray = []
    
    if len(timeStamps) % 2 != 0:
        print("[mergeTimeStamps] ERROR: Unexpected number of timestamp entries.")
        return;
    
    noElements = int(len(timeStamps)/2)   # Total number of timestamps
    indices = range(0, noElements)        # Indices for loop over timestamps
    activeElements = [True] * noElements  # Indicates, which timestamp elements are yet unprocessed
    
    # Select first element to be processed
    element = [timeStamps[0], timeStamps[1]]
    activeElements[0] = False
    
    while any(activeElements):   # while there ar still unprocessed elements
        afterMerge = False
        
        for n in [idx for idx in indices if activeElements[idx]]:
            
            timePair = [timeStamps[2*n], timeStamps[2*n + 1]]
            if not (timePair[0] > element[1] or timePair[1] < element[0]):
                element = [min(element[0], timePair[0]), max(element[1], timePair[1])]
                activeElements[n] = False
                afterMerge = True
                break
            
        if not afterMerge:
            mergedTimeArray = mergedTimeArray + element
            
            idx = activeElements.index(True)
            element = [timeStamps[2*idx], timeStamps[2*idx+1]]
            
    mergedTimeArray = mergedTimeArray + element
    return mergedTimeArray
    

In [192]:
def processTime(timeVector, toolboxes):
    
    time_list = []
    
    
    tboxesFlat = [item for sublist in toolboxes for item in sublist]
    uniqueToolboxes = list(set(tboxesFlat)) 
    
    for toolbox in uniqueToolboxes:
        timeTbx = []    # time vector for toolbox
        
        # Calculate toolbox specific time vector
        cc = 0
        for tbx in toolboxes:
            if toolbox in tbx:
                timeTbx.append(timeVector[2*cc])   #start time
                timeTbx.append(timeVector[2*cc+1]) #end time
            cc = cc + 1;
    
        time_list.append(timeTbx)
        
    return uniqueToolboxes, time_list

In [169]:
tlist = [0, 4, 6, 9, 3, 7, 10, 11]
mergeTimeStamps(tlist)

[0, 9, 10, 11]

In [193]:
tboxes, time_list = processTime(timeVector, toolboxes)
tboxes
for tbx in tboxes:
    print(tbx)

matlab
[datetime.datetime(2020, 6, 16, 16, 30), datetime.datetime(2020, 6, 16, 16, 45), datetime.datetime(2020, 6, 16, 16, 22), datetime.datetime(2020, 6, 16, 18, 5)]
simulink
[datetime.datetime(2020, 6, 16, 16, 30), datetime.datetime(2020, 6, 16, 16, 45)]
matlab
simulink


In [137]:
isActive = [True]*6
isActive[2] = False
indices = range(0,6)

res = [idx for idx in indices if isActive[idx]]
print(res)

for a in [1,3,5]:
    print(a)

[0, 1, 3, 4, 5]
1
3
5


In [67]:
myString = '[matlab,simulink]'
#myString = "[1, 2, 3, 4, 5]"
res = myString.strip('][').split(',')   # use .split(', ') if additional space
res

['matlab', 'simulink']

In [72]:
myString = "$START,13828,40913439,27 (R2020) Update 1,16.06.2020 16:22"
dateString = myString.split(',')[4]
display(dateString)

'16.06.2020 16:22'

In [89]:
time0 = datetime.strptime(dateString, "%d.%m.%Y %H:%M")
time1 = datetime(2020, 6, 16, 17, 30)

dt = time1 - time0
display(dt.total_seconds()/(60*60))  

time1.strftime("%d.%m.%Y %H:%M")

1.1333333333333333

'16.06.2020 17:30'

In [151]:
isActive = [False] * 8
isActive[2] = False
isActive[4] = True

if any(isActive):
    print("yes")
    
isActive.index(True)

yes


4

In [158]:
list1 = [1, 2]
list2 = [3, 4]

list1 + list2

[1, 2, 3, 4]

In [40]:
if 12 in myDict:
    print('yes')

yes


In [173]:
any(['matlab', 'simulink'] == 'simulink')

TypeError: 'bool' object is not iterable