### Setup the data

In [2]:
import pandas as pd
import os
from datetime import datetime, timezone
import time

start = time.time()

all_data = pd.DataFrame()

for file in os.listdir('./'):
    if file.endswith('csv'):
        all_data = pd.read_csv('./' + file, sep=';')
        print("Detected file: " + file)

duration = time.time() - start
print("Total time: %10.2f" % duration)

all_data['PLC_TIME(Timedate48)'] = all_data['PLC_TIME(Timedate48)'].astype(str)

def getUnixTime(date):
    return date.split(',')[0]

def convertToUTC(date):
    return datetime.utcfromtimestamp(date).strftime('%Y-%m-%d %H:%M:%S')

def convertToGMT(unix_timestamp):
    utcTime = datetime.fromtimestamp(unix_timestamp, timezone.utc)
    localTime = utcTime.astimezone()
    return localTime.strftime("%Y-%m-%d %H:%M:%S")
    
all_data['Date_Time_(GMT)'] = all_data['PLC_TIME(Timedate48)'].apply(lambda x: getUnixTime(x))
all_data['Date_Time_(GMT)'] = all_data['Date_Time_(GMT)'].astype('int32')
all_data['Date_Time_(GMT)'] = all_data['Date_Time_(GMT)'].apply(lambda x: convertToGMT(x))
column_name = 'Date_Time_(GMT)'
first_column = all_data.pop(column_name)
all_data.insert(0, column_name, first_column)
all_data['Date_Time_(GMT)'] = pd.to_datetime(all_data['Date_Time_(GMT)'])
all_data = all_data.sort_values(by='Date_Time_(GMT)')
all_data.tail()

Detected file: 2020_09_16_14_02_20.csv
Total time:       5.18


Unnamed: 0,Date_Time_(GMT),TIME,PLC_TIME_OFFSET(Integer8),PLC_TIME_CV(Enum2),PLC_TIME(Timedate48),EVR_VehicleNumber(Unsigned16),EVR_TIME_CV(Enum2),EVR_TIME(Timedate48),EVR_Speed(Unsigned16),EVR_Speed_CV(Enum2),...,EKE_DIOs_Car15(Bitset32),EKE_DIOs_Car16(Bitset32),EKE_DIOs_Car2(Bitset32),EKE_DIOs_Car3(Bitset32),EKE_DIOs_Car4(Bitset32),EKE_DIOs_Car5(Bitset32),EKE_DIOs_Car6(Bitset32),EKE_DIOs_Car7(Bitset32),EKE_DIOs_Car8(Bitset32),EKE_DIOs_Car9(Bitset32)
32496,2020-09-16 18:39:38,16637952,0,1,1600277978000,15002,1,1600277978000,0,1,...,0,0,31c083,31c083,31c083,31c083,31c083,31c083,431c083,0
32497,2020-09-16 18:39:39,16638464,0,1,1600277979000,15002,1,1600277979000,0,1,...,0,0,31c083,31c083,31c083,31c083,31c083,31c083,431c083,0
32498,2020-09-16 18:39:39,16638976,0,1,1600277979000,15002,1,1600277979000,0,1,...,0,0,31c083,31c083,31c083,31c083,31c083,31c083,431c083,0
32499,2020-09-16 18:39:40,16639488,0,1,1600277980000,15002,1,1600277980000,0,1,...,0,0,31c083,31c083,31c083,31c083,31c083,31c083,431c083,0
32500,2020-09-16 18:39:40,16640000,0,1,1600277980000,15002,1,1600277980000,0,1,...,0,0,31c083,31c083,31c083,31c083,31c083,31c083,431c083,0


#### Cleanup the NaN entries

In [32]:
all_data = all_data.dropna(how='all')
all_data.head()

Unnamed: 0,Date_Time_(UTC_+00:00),TIME,PLC_TIME_OFFSET(Integer8),PLC_TIME_CV(Enum2),PLC_TIME(Timedate48),EVR_VehicleNumber(Unsigned16),EVR_TIME_CV(Enum2),EVR_TIME(Timedate48),EVR_Speed(Unsigned16),EVR_Speed_CV(Enum2),...,EKE_DIOs_Car15(Bitset32),EKE_DIOs_Car16(Bitset32),EKE_DIOs_Car2(Bitset32),EKE_DIOs_Car3(Bitset32),EKE_DIOs_Car4(Bitset32),EKE_DIOs_Car5(Bitset32),EKE_DIOs_Car6(Bitset32),EKE_DIOs_Car7(Bitset32),EKE_DIOs_Car8(Bitset32),EKE_DIOs_Car9(Bitset32)
0,2020-09-16 13:02:20,0,0,1,1600261340000,15002,1,1600261340000,0,1,...,0,0,3f1c083,3f1c083,3f1c083,3f1c083,3f1c083,3f1c083,3f1c083,0
1,2020-09-16 13:02:20,512,0,1,1600261340000,15002,1,1600261341000,0,1,...,0,0,3f1c083,3f1c083,3f1c083,3f1c083,3f1c083,3f1c083,3f1c083,0
2,2020-09-16 13:02:21,1024,0,1,1600261341000,15002,1,1600261341000,0,1,...,0,0,3f1c083,3f1c083,3f1c083,3f1c083,3f1c083,3f1c083,3f1c083,0
3,2020-09-16 13:02:21,1536,0,1,1600261341000,15002,1,1600261342000,0,1,...,0,0,3f1c083,3f1c083,3f1c083,3f1c083,3f1c083,3f1c083,3f1c083,0
4,2020-09-16 13:02:22,2048,0,1,1600261342000,15002,1,1600261342000,0,1,...,0,0,3f1c083,3f1c083,3f1c083,3f1c083,3f1c083,3f1c083,3f1c083,0
