In [35]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf

from freeman.tensor import MakeWindow
from freeman.tensor import make_window_from_array, dataset_spec

In [36]:
_PATH_DATA = os.path.join(
    os.path.expanduser('~'),
    'projects', 'ai-projects', '61-WIND-TURBIN', 'data',
    'climate_test.csv'
)

df_data = pd.read_csv(_PATH_DATA)

In [37]:
df_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7013 entries, 0 to 7012
Data columns (total 19 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   p (mbar)         6940 non-null   float64
 1   T (degC)         6940 non-null   float64
 2   Tpot (K)         6940 non-null   float64
 3   Tdew (degC)      6940 non-null   float64
 4   rh (%)           6940 non-null   float64
 5   VPmax (mbar)     6940 non-null   float64
 6   VPact (mbar)     6940 non-null   float64
 7   VPdef (mbar)     6940 non-null   float64
 8   sh (g/kg)        6940 non-null   float64
 9   H2OC (mmol/mol)  6940 non-null   float64
 10  rho (g/m**3)     6940 non-null   float64
 11  Vwv_X            6940 non-null   float64
 12  Vwv_Y            6940 non-null   float64
 13  Vmwv_X           6940 non-null   float64
 14  Vmwv_Y           6940 non-null   float64
 15  day_sin          7013 non-null   float64
 16  day_cos          7013 non-null   float64
 17  year_sin      

In [38]:
df_data.fillna(0.0, inplace=True)

# 전체 컬럼 인덱싱
column_indices = {name: i for i, name in enumerate(df_data.columns)}

# 목표 컬럼 인덱스 구하기
target_col = 'T (degC)'
target_idx = column_indices[target_col]

In [39]:
df_data.isna().sum()

p (mbar)           0
T (degC)           0
Tpot (K)           0
Tdew (degC)        0
rh (%)             0
VPmax (mbar)       0
VPact (mbar)       0
VPdef (mbar)       0
sh (g/kg)          0
H2OC (mmol/mol)    0
rho (g/m**3)       0
Vwv_X              0
Vwv_Y              0
Vmwv_X             0
Vmwv_Y             0
day_sin            0
day_cos            0
year_sin           0
year_cos           0
dtype: int64

In [40]:
WINDOW_SIZE = 24
BATCH_SIZE = 32

In [41]:
nd_data = df_data.values

In [42]:
try:
    df_win_data = MakeWindow(
        df_data, 
        WINDOW_SIZE, BATCH_SIZE,
        label_col=target_col
    )
except ValueError as ve:
    print(f'Program Error:\n{ve}')
except Exception as e:
    print(f'System Error:\n{e}')

Batch dataset shape: features(32, 24, 19), label(32,), Last batch dataset shape: features(13, 24, 19), label(13,)
Batch dataset size: 219
Total dataset row size: 6989 = 32 * (219 - 1) + 13


In [43]:
try:
    nd_win_data = MakeWindow(
        nd_data, 
        WINDOW_SIZE, BATCH_SIZE,
        label_idx=target_idx,
        drop_col_label=True
    )
except ValueError as ve:
    print(f'Program Error:\n{ve}')
except Exception as e:
    print(f'System Error:\n{e}')

Batch dataset shape: features(32, 24, 18), label(32,), Last batch dataset shape: features(13, 24, 18), label(13,)
Batch dataset size: 219
Total dataset row size: 6989 = 32 * (219 - 1) + 13


In [44]:
df_X = df_data.copy()

In [45]:
df_y = df_X.pop(target_col)

In [46]:
try:
    tuple_win_data = MakeWindow(
        (df_X.values, df_y), 
        WINDOW_SIZE, BATCH_SIZE,
        last_idx_label=True
    )
except ValueError as ve:
    print(f'Program Error:\n{ve}')
except Exception as e:
    print(f'System Error:\n{e}')

Batch dataset shape: features(32, 24, 18), label(32,), Last batch dataset shape: features(14, 24, 18), label(14,)
Batch dataset size: 219
Total dataset row size: 6990 = 32 * (219 - 1) + 14


In [47]:
# wd: window data
# sd: source data
# ws: window size
# bs: batch size
# tidx: target column index
# dcol: drop target column (bool)
# lidx: get target in window last row (bool)
def check_window_data(wd, sd, ws, bs, tidx, dcol, lidx):
    is_match = True
    idx = 0
    X_data = np.delete(sd, tidx, axis=1) if dcol else sd 
    y_data = sd[ws-1 if lidx else ws:, tidx]
    
    for x, y in wd.take(-1):
        idxx1 = 0
        for xe in x:
            idxx2 = 0
            for xee in xe.numpy():
                if np.all(X_data[idx*bs+idxx1+idxx2] != xee):
                    print(f'Not match X({idx}, {idxx1}, {idxx2}): \n{X_data[idx*bs+idxx1+idxx2]}\n{xee}')
                    is_match = False
                idxx2 += 1
            idxx1 += 1
        idxy = 0
        for ye in y:
            if np.all(y_data[idx*bs+idxy]!=ye):
                print(f'Not match y({idx}, {idxy}): {y_data[idx*bs+idxy]}, {ye}')    
                is_match = False
            idxy += 1   
        idx += 1     
        if not is_match:
            print('Not Match')
            break
        
    if is_match:
        print('Match')

In [48]:
df_win_data_data = df_win_data.win_data
for X, y in df_win_data_data.take(1):
    print()




In [49]:
check_window_data(
    df_win_data.win_data, nd_data, WINDOW_SIZE, BATCH_SIZE, target_idx, False, False
)

Match


In [53]:
check_window_data(
    nd_win_data.win_data, nd_data, WINDOW_SIZE, BATCH_SIZE, target_idx, True, False
)

Match


In [56]:
check_window_data(
    tuple_win_data.win_data, nd_data, WINDOW_SIZE, BATCH_SIZE, target_idx, True, True
)

Match
