# このファイルについて
トラカンデータの期間を分割・結合する

In [1]:
import pandas as pd

期間のとり方
- 2021/04/01 〜 2022/03/31
- 2022/04/01 〜 2023/03/31
- 2023/04/01 〜 2023/09/30
- 2023/10/01 ~ 2024/03/31（推論補助用）
- 2024/04/01 ~ 2024/05/06（最終評価用）

In [4]:
# PREFIX = ''
PREFIX = 'merged_1h_'

TARGET_ROAD = 'kannetsu'
# TARGET_ROAD = 'touhoku'

In [7]:
period1 = '20231001-20240331'
period2 = '20240401-20240506'

## データを読み込む

In [8]:
dtype = {'start_code': 'category', 'end_code': 'category'}

df1 = pd.read_csv(f'./traffic/{PREFIX + TARGET_ROAD}_{period1}_2KP.csv', dtype=dtype)
df2 = pd.read_csv(f'./traffic/{PREFIX + TARGET_ROAD}_{period2}_2KP.csv', dtype=dtype)

In [10]:
df1.datetime = pd.to_datetime(df1.datetime)
df2.datetime = pd.to_datetime(df2.datetime)

In [11]:
df1.head(3)

Unnamed: 0,datetime,start_name,end_name,start_code,end_code,start_pref_code,end_pref_code,start_lat,end_lat,start_lng,...,KP,start_KP,end_KP,direction,limit_speed,OCC,allCars,speed,search_specified,search_unspecified
0,2023-10-01 00:00:00,大泉ＪＣＴ,所沢,1110210,1800006,13,11,35.75582,35.80615,139.601514,...,2.48,0.8,9.4,down,100.0,2.0,633.0,96.380126,77,6469.0
1,2023-10-01 01:00:00,大泉ＪＣＴ,所沢,1110210,1800006,13,11,35.75582,35.80615,139.601514,...,2.48,0.8,9.4,down,100.0,1.25,442.0,95.986456,12,6469.0
2,2023-10-01 02:00:00,大泉ＪＣＴ,所沢,1110210,1800006,13,11,35.75582,35.80615,139.601514,...,2.48,0.8,9.4,down,100.0,1.0,332.0,96.402402,19,6469.0


In [12]:
df1.tail(3)

Unnamed: 0,datetime,start_name,end_name,start_code,end_code,start_pref_code,end_pref_code,start_lat,end_lat,start_lng,...,KP,start_KP,end_KP,direction,limit_speed,OCC,allCars,speed,search_specified,search_unspecified
606093,2024-03-31 21:00:00,昭和,沼田(関越道),1800106,1800111,10,10,36.61077,36.6519,139.070694,...,125.37,120.7,125.9,下り,80.0,,0.0,80.0,17,3148.0
606094,2024-03-31 22:00:00,昭和,沼田(関越道),1800106,1800111,10,10,36.61077,36.6519,139.070694,...,125.37,120.7,125.9,下り,80.0,,0.0,80.0,8,3148.0
606095,2024-03-31 23:00:00,昭和,沼田(関越道),1800106,1800111,10,10,36.61077,36.6519,139.070694,...,125.37,120.7,125.9,下り,80.0,,0.0,80.0,2,3148.0


In [13]:
df2.head(3)

Unnamed: 0,datetime,start_name,end_name,start_code,end_code,start_pref_code,end_pref_code,start_lat,end_lat,start_lng,...,KP,start_KP,end_KP,direction,limit_speed,OCC,allCars,speed,search_specified,search_unspecified
0,2024-04-01 00:00:00,大泉ＪＣＴ,所沢,1110210,1800006,13,11,35.75582,35.80615,139.601514,...,2.48,0.8,9.4,下り,100.0,2.0,655.0,96.769817,77,4723.0
1,2024-04-01 01:00:00,大泉ＪＣＴ,所沢,1110210,1800006,13,11,35.75582,35.80615,139.601514,...,2.48,0.8,9.4,下り,100.0,1.333333,459.0,97.373913,37,4723.0
2,2024-04-01 02:00:00,大泉ＪＣＴ,所沢,1110210,1800006,13,11,35.75582,35.80615,139.601514,...,2.48,0.8,9.4,下り,100.0,1.083333,359.0,94.75,17,4723.0


In [14]:
df2.tail(3)

Unnamed: 0,datetime,start_name,end_name,start_code,end_code,start_pref_code,end_pref_code,start_lat,end_lat,start_lng,...,KP,start_KP,end_KP,direction,limit_speed,OCC,allCars,speed,search_specified,search_unspecified
119229,2024-05-06 21:00:00,昭和,沼田(関越道),1800106,1800111,10,10,36.61077,36.6519,139.070694,...,125.37,120.7,125.9,下り,80.0,,0.0,80.0,19,4021.0
119230,2024-05-06 22:00:00,昭和,沼田(関越道),1800106,1800111,10,10,36.61077,36.6519,139.070694,...,125.37,120.7,125.9,下り,80.0,,0.0,80.0,6,4021.0
119231,2024-05-06 23:00:00,昭和,沼田(関越道),1800106,1800111,10,10,36.61077,36.6519,139.070694,...,125.37,120.7,125.9,下り,80.0,,0.0,80.0,10,4021.0


## データ期間の分割・統合

In [78]:
df_former = df2.loc[df2.datetime < pd.Timestamp('2024/4/1')]
df_latter = df2.loc[df2.datetime >= pd.Timestamp('2024/4/1')]

In [79]:
df1_new = pd.concat([df1, df_former], axis=0)
df2_new = pd.concat([df_latter], axis=0)

In [80]:
# ソート
df1_new = df1_new.sort_values(
    ['start_code', 'end_code', 'KP', 'datetime']
).reset_index(drop=True)

df2_new = df2_new.sort_values(
    ['start_code', 'end_code', 'KP', 'datetime']
).reset_index(drop=True)

In [81]:
# データフレーム長が一致することを検証
assert len(df1) + len(df2) == len(df1_new) + len(df2_new)

In [82]:
file1 = f'./traffic/{TARGET_ROAD}_20231001-20240331_2KP.csv'
file2 = f'./traffic/{TARGET_ROAD}_20240401-20240506_2KP.csv'

In [87]:
%time df1_new.to_csv(file1, index=False)

CPU times: user 2min 44s, sys: 1.31 s, total: 2min 45s
Wall time: 2min 45s


In [88]:
%time df2_new.to_csv(file2, index=False)

CPU times: user 32.2 s, sys: 255 ms, total: 32.4 s
Wall time: 32.5 s
