In [97]:
from datetime import datetime
from typing import List


def _partial_transpose(output_path: str, lines: List[bytes], is_initial_turn: bool) -> None:
    assert len(lines) == 18
    output_mode: str =  "w" if is_initial_turn else "a"
    data = [{} for _ in range(24)]

    with open(output_path, output_mode) as f:
        for line in lines:
            raw_date, _, raw_metric, raw_values = line.strip().split(b",", 3)
            
            date: str = raw_date.decode()
            metric: str = raw_metric.decode()
            values: List[str] = [raw_value.decode() for raw_value in raw_values.split(b",")]
            assert len(values) == 24
            
            for hour, _ in enumerate(data):
                timestamp: int = int(datetime.strptime(date + f" {hour}", "%Y/%m/%d %H").timestamp())
                data[hour]["timestamp"] = str(timestamp)
            
            for hour, value in enumerate(values):
                data[hour][metric] = ("0" if value == "NR" else value)
                
        metrics: List[str] = sorted(data[0].keys())
        if is_initial_turn:
            f.write(",".join(metrics) + "\n")
            
        for item in data:
            f.write(",".join([item[metric] for metric in metrics]) + "\n")


def reorganize_training_data(*, input_path: str, output_path: str) -> None:
    with open(input_path, "rb") as f:
        lines: List[bytes] = []
        
        for i, line in enumerate(f):
            if i == 0:  # Skip the header line
                continue
            
            lines.append(line)
            
            if i % 18 == 0:
                _partial_transpose(output_path, lines, i == 18)
                lines = []
                break
                


In [98]:
original_training_data_path = "./train.csv"
reorganized_training_data_path = "/tmp/train.csv"

In [101]:
reorganize_training_data(
    input_path=original_training_data_path,
    output_path=reorganized_training_data_path
)

!cat /tmp/train.csv | head -n 25

AMB_TEMP,CH4,CO,NMHC,NO,NO2,NOx,O3,PM10,PM2.5,RAINFALL,RH,SO2,THC,WD_HR,WIND_DIREC,WIND_SPEED,WS_HR,timestamp
14,1.8,0.51,0.2,0.9,16,17,16,56,26,0,77,1.8,2,37,35,1.4,0.5,1388505600
14,1.8,0.41,0.15,0.6,9.2,9.8,30,50,39,0,68,2,2,80,79,1.8,0.9,1388509200
14,1.8,0.39,0.13,0.5,8.2,8.7,27,48,36,0,67,1.7,2,57,2.4,1,0.6,1388512800
13,1.8,0.37,0.12,1.7,6.9,8.6,23,35,35,0,74,1.6,1.9,76,55,0.6,0.3,1388516400
12,1.8,0.35,0.11,1.8,6.8,8.5,24,25,31,0,72,1.9,1.9,110,94,1.7,0.6,1388520000
12,1.8,0.3,0.06,1.5,3.8,5.3,28,12,28,0,73,1.4,1.8,106,116,2.5,1.9,1388523600
12,1.8,0.37,0.1,1.9,6.9,8.8,24,4,25,0,74,1.5,1.9,101,106,2.5,2,1388527200
12,1.8,0.47,0.13,2.2,7.8,9.9,22,2,20,0,73,1.6,1.9,104,94,2,2,1388530800
15,1.8,0.78,0.26,6.6,15,22,21,11,19,0,66,5.1,2.1,124,232,0.6,0.5,1388534400
17,1.8,0.74,0.23,7.9,21,29,29,38,30,0,56,15,2,46,153,0.8,0.3,1388538000
20,1.8,0.59,0.2,4.2,14,18,44,56,41,0,45,4.5,2,241,283,1.6,0.8,1388541600
22,1.8,0.52,0.18,2.9,11,14,58,64,44,0,37,2.7,2,280,269,1.9,1.2,1388545200
22,

# Test

In [3]:
!cat ./train.csv | head -n 20

���,����,����,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23
2014/1/1,�׭�,AMB_TEMP,14,14,14,13,12,12,12,12,15,17,20,22,22,22,22,22,21,19,17,16,15,15,15,15
2014/1/1,�׭�,CH4,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8
2014/1/1,�׭�,CO,0.51,0.41,0.39,0.37,0.35,0.3,0.37,0.47,0.78,0.74,0.59,0.52,0.41,0.4,0.37,0.37,0.47,0.69,0.56,0.45,0.38,0.35,0.36,0.32
2014/1/1,�׭�,NMHC,0.2,0.15,0.13,0.12,0.11,0.06,0.1,0.13,0.26,0.23,0.2,0.18,0.12,0.11,0.1,0.13,0.14,0.23,0.18,0.12,0.1,0.09,0.1,0.08
2014/1/1,�׭�,NO,0.9,0.6,0.5,1.7,1.8,1.5,1.9,2.2,6.6,7.9,4.2,2.9,3.4,3,2.5,2.2,2.5,2.3,2.1,1.9,1.5,1.6,1.8,1.5
2014/1/1,�׭�,NO2,16,9.2,8.2,6.9,6.8,3.8,6.9,7.8,15,21,14,11,14,12,11,11,22,28,19,12,8.1,7,6.9,6
2014/1/1,�׭�,NOx,17,9.8,8.7,8.6,8.5,5.3,8.8,9.9,22,29,18,14,17,15,14,13,25,30,21,13,9.7,8.6,8.7,7.5
2014/1/1,�׭�,O3,16,30,27,23,24,28,24,22,21,29,44,58,50,57,65,64,51,34,33,34,37,38,38,36
2014/1/1,�׭�,PM10,56,50,48,35,25,12,4,2,11,38,56,64,56,57

In [13]:
for a in range(1, 10, 2):
    print(a)

0
2
4
6
8


In [92]:
a = {"a": 123, "b": 234}
sorted(a.keys())

['a', 'b']

In [54]:
d

datetime.datetime(2020, 2, 1, 1, 0)