In [27]:
from datetime import datetime, timedelta
import numpy as np
import csv

In [28]:
def parse_time(date_string):
    year, month, day, hour, minute, second = (
        date_string.split("-")[0],
        date_string.split("-")[1],
        date_string.split("-")[2].split(" ")[0],
        date_string.split(" ")[1].split(":")[0],
        date_string.split(":")[1],
        date_string.split(":")[2].split(".")[0],
    )
    return [year, month, day, hour, minute, second]


def dummies_time(timestamp_start, timestamp_end):
    dt_start = datetime.strptime(timestamp_start, "%Y-%m-%d %H:%M:%S.%f")
    dt_end = datetime.strptime(timestamp_end, "%Y-%m-%d %H:%M:%S.%f")

    hours = np.zeros(24, dtype=np.uint8)

    if dt_end > dt_start + timedelta(hours=23):
        hours[:] = 1
        return hours

    dt_start_midnight = dt_start.replace(
        hour=0, minute=0, second=0, microsecond=0
    ) + timedelta(days=1)

    if dt_end > dt_start_midnight:
        hour_s = dt_start.hour
        hours[hour_s : 24 + 1] = 1
        if dt_end < dt_start_midnight + timedelta(days=1):
            hour_e = dt_end.hour
            hours[0 : hour_e + 1] = 1
        else:
            hours[:] = 1  # non-reachable?

        return hours

    if dt_end < dt_start_midnight:
        hour_s = dt_start.hour
        hour_e = dt_end.hour
        hours[hour_s : hour_e + 1] = 1
        return hours


def process_line(old_line):
    # ['10002', '한국전력', '마포용산지사', '서울특별시', '마포구', '', '', 'KEPE000045', '7', '2', '2022-11-01 12:04:00.868', '2022-11-01 13:26:04.431', '0 01:22:03.563', '2']
    try:
        date_start = old_line[10]
        date_end = old_line[11]
        if date_start == "mod_date_start_time":
            return [
                "charger_id",
                "operator",
                "snm",
                "_col5",
                "_col6",
                "skind_name",
                "kinddtl_name",
                "cid",
                "power",
                "type_id",
                "year_s",
                "month_s",
                "day_s",
                "hour_s",
                "minute_s",
                "second_s",
                "year_e",
                "month_e",
                "day_e",
                "hour_e",
                "minute_e",
                "second_e",
                "00",
                "01",
                "02",
                "03",
                "04",
                "05",
                "06",
                "07",
                "08",
                "09",
                "10",
                "11",
                "12",
                "13",
                "14",
                "15",
                "16",
                "17",
                "18",
                "19",
                "20",
                "21",
                "22",
                "23",
                "_col14",
                "_col15",
            ]

        hours = dummies_time(date_start, date_end)
        # print(hours)

        return (
            old_line[:10]
            + parse_time(date_start)
            + parse_time(date_end)
            + hours.tolist()
            + old_line[12:]
        )

    except Exception as e:
        print(e)
        print(old_line)

        # return old_line

In [29]:
input_file_path = "data\\220809_EV_CHARGING_DATA.csv"
output_file_path = "output\\220809_EV_CHARGING_processed.csv"


with open(input_file_path, "r", encoding="utf-8") as infile, open(
    output_file_path,
    "w",
    encoding="utf-8",
    newline="",  # https://stackoverflow.com/questions/3348460/csv-file-written-with-python-has-blank-lines-between-each-row
) as outfile:
    reader = csv.reader(infile)
    writer = csv.writer(outfile)
    for i, line in enumerate(reader):
        try:
            writer.writerow(process_line(line))
        except Exception as e:
            print(e)

        if i % 10000 == 0:
            print(f"processed line #{i}")

print(f"Converted file written to {output_file_path}")

processed line #0
processed line #10000
processed line #20000
processed line #30000
processed line #40000
processed line #50000
processed line #60000
processed line #70000
processed line #80000
processed line #90000
processed line #100000
processed line #110000
processed line #120000
processed line #130000
processed line #140000
processed line #150000
processed line #160000
processed line #170000
processed line #180000
processed line #190000
processed line #200000
processed line #210000
processed line #220000
processed line #230000
processed line #240000
processed line #250000
processed line #260000
processed line #270000
processed line #280000
processed line #290000
processed line #300000
processed line #310000
processed line #320000
processed line #330000
processed line #340000
processed line #350000
processed line #360000
processed line #370000
processed line #380000
processed line #390000
processed line #400000
processed line #410000
processed line #420000
processed line #430000
pr

In [None]:
number_of_lines = 5
with open("data\\220809_EV_CHARGING_DATA.csv", "r", encoding="utf-8") as file:
    reader = csv.reader(file)
    lines = [next(reader) for _ in range(number_of_lines)]

for line in lines:
    print(line)
    print(process_line(line))

['charger_id', 'operator', 'snm', '_col5', '_col6', 'skind_name', 'kinddtl_name', 'cid', 'power', 'type_id', 'mod_date_start_time', 'mod_date_end_time', '_col14', '_col15']
['charger_id', 'operator', 'snm', '_col5', '_col6', 'skind_name', 'kinddtl_name', 'cid', 'power', 'type_id', 'year_s', 'month_s', 'day_s', 'hour_s', 'minute_s', 'second_s', 'year_e', 'month_e', 'day_e', 'hour_e', 'minute_e', 'second_e', '00', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '_col14', '_col15']
['10002', '한국전력', '마포용산지사', '서울특별시', '마포구', '', '', 'KEPE000045', '7', '2', '2022-11-01 12:04:00.868', '2022-11-01 13:26:04.431', '0 01:22:03.563', '2']
['10002', '한국전력', '마포용산지사', '서울특별시', '마포구', '', '', 'KEPE000045', '7', '2', '2022', '11', '01', '12', '04', '00', '2022', '11', '01', '13', '26', '04', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '0 01:22:03.563', '2']
['10002', '한국전력', '마포용산지사'

In [26]:
number_of_lines = 5
with open("output\\220809_EV_CHARGING_processed.csv", "r", encoding="utf-8") as file:
    lines = [next(file).strip() for _ in range(number_of_lines)]

for line in lines:
    print(line)

charger_id,operator,snm,_col5,_col6,skind_name,kinddtl_name,cid,power,type_id,year_s,month_s,day_s,hour_s,minute_s,second_s,year_e,month_e,day_e,hour_e,minute_e,second_e,00,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,16,17,18,19,20,21,22,23,_col14,_col15
10002,한국전력,마포용산지사,서울특별시,마포구,,,KEPE000045,7,2,2022,11,01,12,04,00,2022,11,01,13,26,04,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0 01:22:03.563,2
10002,한국전력,마포용산지사,서울특별시,마포구,,,KEPE000045,7,2,2022,11,01,14,42,02,2022,11,01,15,06,05,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0 00:24:03.184,2
10002,한국전력,마포용산지사,서울특별시,마포구,,,KEPE000045,7,2,2022,11,01,14,40,01,2022,11,01,16,01,06,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0 01:21:04.683,2
10002,한국전력,마포용산지사,서울특별시,마포구,,,KEPE000045,7,2,2022,11,01,16,59,02,2022,11,01,19,16,08,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0 02:17:06.194,2
