In [3]:
import os
import numpy as np
import torch

# edgeitems, threshold, linewidth 설정
torch.set_printoptions(edgeitems=2, threshold=50, linewidth=75)

# 데이터셋 경로 설정
bikes_path = os.path.join(os.path.pardir, os.path.pardir, "_00_data", "e_time-series-bike-sharing-dataset", "hour-fixed.csv")

# 첫번째 열은 날짜에서 일만 추출하여 변환
bikes_numpy = np.loadtxt(
  fname=bikes_path, dtype=np.float32, delimiter=",", skiprows=1,
  converters={
    1: lambda x: float(x[8:10])  # 2011-01-07 --> 07 --> 7.0
  }
)

# 넘파이 배열에서 텐서로 변환
bikes = torch.from_numpy(bikes_numpy)
print(bikes.shape)

# 데이터를 24시간 단위로 변환(메트릭스 reshape)
daily_bikes = bikes.view(-1, 24, bikes.shape[1])
print(daily_bikes.shape)  # >>> torch.Size([730, 24, 17])

# 마지막 열을 제외한 데이터를 저장
daily_bikes_data = daily_bikes[:, :, :-1]

# 마지막 열을 저장, 맨 뒤에 차원 추가
daily_bikes_target = daily_bikes[:, :, -1].unsqueeze(dim=-1)

print(daily_bikes_data.shape)
print(daily_bikes_target.shape)

print("#" * 50, 1)

first_day_data = daily_bikes_data[0]
print(first_day_data.shape)

# Whether situation: 1: clear, 2:mist, 3: light rain/snow, 4: heavy rain/snow
# 날씨 상태 출력 ( 9 번째 열은 날씨 정보)
print(first_day_data[:, 9].long())
# 4*4 아이덴티티 메트릭스 생성
eye_matrix = torch.eye(4)
print(eye_matrix)

# 날씨 정보 원핫 벡터 맵핑
weather_onehot = eye_matrix[first_day_data[:, 9].long() - 1]
print(weather_onehot.shape)
print(weather_onehot)

# 원본 데이터에 원핫 벡터터 결합
first_day_data_torch = torch.cat(tensors=(first_day_data, weather_onehot), dim=1)
print(first_day_data_torch.shape)
print(first_day_data_torch)

print("#" * 50, 2)

day_data_torch_list = []


# 총 730일 데이터 처리
for daily_idx in range(daily_bikes_data.shape[0]):  # range(730)
  # 하루 데이터 추출
  day = daily_bikes_data[daily_idx]  # day.shape: [24, 16]
  # 날씨 원핫 코딩
  weather_onehot = eye_matrix[day[:, 9].long() - 1]
  # 원본 데이터와 원핫 벡터 결합
  day_data_torch = torch.cat(tensors=(day, weather_onehot), dim=1)  # day_data_torch.shape: [24, 20]
  # 리스트에 날짜 데이터 추가
  day_data_torch_list.append(day_data_torch)

print(len(day_data_torch_list))
# 하나의 텐서로 결합
daily_bikes_data = torch.stack(day_data_torch_list, dim=0)
print(daily_bikes_data.shape)

print("#" * 50, 3)

print(daily_bikes_data[:, :, :9].shape, daily_bikes_data[:, :, 10:].shape)
daily_bikes_data = torch.cat(
  # 0번째 열과 9번째 열을 제외한 텐서를 결합
  [daily_bikes_data[:, :, 1:9], daily_bikes_data[:, :, 10:]], dim=2
) # Drop 'instant' and 'whethersit' columns
print(daily_bikes_data.shape)

temperatures = daily_bikes_data[:, :, 8]
# 온도 특징 정규화
daily_bikes_data[:, :, 8] = (daily_bikes_data[:, :, 8] - torch.mean(temperatures)) / torch.std(temperatures)


torch.Size([17520, 17])
torch.Size([730, 24, 17])
torch.Size([730, 24, 16])
torch.Size([730, 24, 1])
################################################## 1
torch.Size([24, 16])
tensor([1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 2, 2,
        2, 2])
tensor([[1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 0., 1.]])
torch.Size([24, 4])
tensor([[1., 0., 0., 0.],
        [1., 0., 0., 0.],
        ...,
        [0., 1., 0., 0.],
        [0., 1., 0., 0.]])
torch.Size([24, 20])
tensor([[ 1.,  1.,  ...,  0.,  0.],
        [ 2.,  1.,  ...,  0.,  0.],
        ...,
        [23.,  1.,  ...,  0.,  0.],
        [24.,  1.,  ...,  0.,  0.]])
################################################## 2
730
torch.Size([730, 24, 20])
################################################## 3
torch.Size([730, 24, 9]) torch.Size([730, 24, 10])
torch.Size([730, 24, 18])
