In [None]:
import os
import glob
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchinfo import summary
from torch.utils.data import DataLoader, TensorDataset
import warnings
warnings.filterwarnings('ignore')
import matplotlib.dates as mdates

from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from tqdm import tqdm

In [None]:
device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
print(device)

In [None]:
class SeqEncoder(nn.Module):
    def __init__(self, input_size, hidden_size=64, latent_size=16):
        super(SeqEncoder, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, latent_size)

    def forward(self, x):  # x: [B, T, F]
        _, (h_n, _) = self.lstm(x)  # h_n: [1, B, H]
        z = self.fc(h_n.squeeze(0))  # z: [B, latent_size]
        return z

class SeqDecoder(nn.Module):
    def __init__(self, latent_size, hidden_size=64, output_size=3, seq_len=120):
        super(SeqDecoder, self).__init__()
        self.seq_len = seq_len
        self.fc = nn.Linear(latent_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, output_size, batch_first=True)

    def forward(self, z):  # z: [B, latent_size]
        h0 = self.fc(z).unsqueeze(0)  # [1, B, H]
        h_seq = h0.repeat(self.seq_len, 1, 1).permute(1, 0, 2)  # [B, T, H]
        out, _ = self.lstm(h_seq)  # [B, T, F]
        return out

class TranAD(nn.Module):
    def __init__(self, input_size=3, hidden_size=64, latent_size=16, seq_len=120):
        super(TranAD, self).__init__()
        self.encoder = SeqEncoder(input_size, hidden_size, latent_size)
        self.decoder = SeqDecoder(latent_size, hidden_size, input_size, seq_len)
        self.seq_len = seq_len

    def forward(self, x):  # x: [B, T, F]
        z = self.encoder(x)             # [B, latent_size]
        x_hat = self.decoder(z)         # [B, T, F]
        z_hat = self.encoder(x_hat)     # [B, latent_size]
        return x_hat, z.unsqueeze(1).repeat(1, self.seq_len, 1), z_hat.unsqueeze(1).repeat(1, self.seq_len, 1)


In [None]:
def data_norm(df,mode='train',scaler=''):
    """
    Normalize the data.
    df(dataframe) : Input
    return tmp(dataframe), scaler : normalized dataframe
    """
    columns = df.columns[1:]

    tmp = df.copy()
    if mode == 'train':
        # Normalize
        scaler = MinMaxScaler()
        scaled = scaler.fit_transform(tmp[columns])
    elif mode=='test':
        scaler = scaler
        scaled = scaler.transform(tmp[columns])
    # Insert the normalized value to the original frame
    tmp[columns] = scaled

    return tmp, scaler

In [None]:
def inverse_norm(tensor, scaler):
    """
    tensor: [N, T, C] → numpy 배열로 변환 후 역정규화 수행
    """
    shape = tensor.shape
    data = tensor.reshape(-1, shape[-1])
    inv = scaler.inverse_transform(data)
    return inv.reshape(shape)

In [None]:
def create_sequences(df, seq_len=10):
    """
    날짜 포함된 시퀀스 생성 함수
    df: ['date', 'rn', 'vl', 'wl'] 포함된 DataFrame
    반환: (data_seq, date_seq)
        - data_seq: torch.Tensor [N, seq_len, 3]
        - date_seq: List[List[str]] [N, seq_len]
    """
    cols = df.columns[1:]  # 날짜 제외한 나머지 변수
    values = df[cols].values.astype(np.float32)
    dates = df['dates'].values  # 문자열 형태로 추출

    data_sequences = []
    date_sequences = []

    for i in range(len(df) - seq_len + 1):
        data_seq = values[i:i+seq_len]
        date_seq = dates[i:i+seq_len]
        data_sequences.append(data_seq)
        date_sequences.append(date_seq)

    return torch.tensor(np.stack(data_sequences)), date_sequences

In [None]:
def create_combined_sequences(train_dfs, test_dfs, seq_len=120):
    """
    train_dfs(List): Input
    test_dfs(List): Input
    return train_seq(arr), val_seq(arr): train/val sliding window sequences tensor
    """
    
    train_seqs = []
    test_seqs = []
    train_dates = []
    test_dates = []

    for train, test in zip(train_dfs, test_dfs):
        train_seq, train_date = create_sequences(train, seq_len=seq_len)
        test_seq, test_date = create_sequences(test, seq_len=seq_len)
        train_seqs.append(train_seq)
        train_dates.extend(train_date)
        test_seqs.append(test_seq)
        test_dates.extend(test_date)
    
    return torch.cat(train_seqs, dim=0), torch.cat(test_seqs, dim=0), train_dates, test_dates

In [None]:
def create_dataloaders(train_seq, val_seq, batch_size=64, shuffle=True):
    """
    Create the dataloader.
    train_seq(tensor)
    val_seq(tensor)
    return train_loader, val_loader
    """
    # Make TensorDataset 생성 (Same input and output)
    train_dataset = TensorDataset(train_seq, train_seq) # X, y
    val_dataset = TensorDataset(val_seq, train_seq) # X, y

    # Make DataLoader (Load X,y)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle) 
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader

In [None]:
def train_tranad(model, train_loader, num_epochs=50, alpha=0.5, lr=1e-3, device='cuda',sav_path=None):
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    best_loss = float('inf')  # 가장 작은 loss 추적용
    save_path = sav_path  # 저장할 폴더
    os.makedirs(save_path, exist_ok=True)
    losses = []
    for epoch in range(1, num_epochs + 1):
        model.train()
        total_loss = 0

        for x, _ in tqdm(train_loader, desc=f"Epoch {epoch}/{num_epochs}"):
            x = x.to(device)

            optimizer.zero_grad()
            x_hat, z, z_hat = model(x)

            loss1 = criterion(x_hat, x)
            loss2 = criterion(z_hat, z)
            loss = alpha * loss1 + (1 - alpha) * loss2

            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        
        print(f"Epoch {epoch+1}/{num_epochs} - Loss: {total_loss:.4f}")
        losses.append(total_loss)
        # save weights file
        if total_loss < best_loss:
            best_loss = total_loss
            torch.save(model.state_dict(), os.path.join(save_path, 'TranAD_best.pth'))
            print(f"[Epoch {epoch}] 모델 저장됨: val_loss = {total_loss:.6f}")

    plt.figure(figsize=(8, 5))
    plt.plot(losses, label='Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training Loss per Epoch')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(os.path.join(sav_path, f'TranAD_{seq_len}_train_loss_plot.png'))
    plt.close()
    print(f"[Saved] Training loss plot saved at {os.path.join(sav_path, f'TranAD_{seq_len}_train_loss_plot.png')}")

In [None]:
def test_tranad(model, test_loader, device='cuda'):
    model.eval()
    model.to(device)

    recon_errors = []
    all_x = []
    all_xhat = []

    criterion = nn.MSELoss(reduction='none')  # feature별 loss 확인 위해

    with torch.no_grad():
        for x, _ in test_loader:
            x = x.to(device)

            x_hat, _, _ = model(x)  # z, z_hat은 여기선 사용하지 않음

            # reconstruction error 계산
            loss = (x_hat - x) ** 2  # [B, T, F]
            loss = loss.mean(dim=2)  # feature 평균 → [B, T]
            recon_errors.append(loss.cpu().numpy())

            # 시계열 저장
            all_x.append(x.cpu())
            all_xhat.append(x_hat.cpu())

    # concat
    all_x = torch.cat(all_x, dim=0)         # [N, T, F]
    all_xhat = torch.cat(all_xhat, dim=0)   # [N, T, F]
    recon_errors = np.concatenate(recon_errors, axis=0)  # [N, T]

    return all_x, all_xhat, recon_errors


In [None]:
def compute_anomaly_scores(x_seq, w2_seq, mode='vl'):
    """
    x_seq, w2_seq: [N, T, 3]
    mode: 'vl' → 관로수위만 사용 / 'all' → 전체 변수 평균
    반환: [N] shape의 이상치 점수 벡터
    """
    # 관로수위 (index 2)만 비교
    errors = (x_seq - w2_seq) ** 2  # [N, T]
    scores = errors.mean(dim=1)  # 시퀀스별 평균 MSE → [N]
    return errors.numpy(), scores.numpy()

In [None]:
def determine_threshold(scores, method='iqr', k=3):
    """
    scores: 이상치 점수 벡터 (numpy)
    method: 'iqr', 'mean_std', 'percentile'
    """
    if method == 'iqr':
        q1 = np.percentile(scores, 25)
        q3 = np.percentile(scores, 75)
        iqr = q3 - q1
        threshold = q3 + 1.5 * iqr

    elif method == 'mean_std':
        mean = np.mean(scores)
        std = np.std(scores)
        threshold = mean + k * std

    elif method == 'percentile':
        threshold = np.percentile(scores, 95)

    else:
        raise ValueError("지원하지 않는 방식입니다.")

    return threshold

In [None]:
def plot_corrected_vs_original(original_seq, corrected_seq, input_seq, sav_path = '', sample_idx=0):
    """
    단일 시퀀스 비교 시각화 (60분간)
    """
    orig = original_seq[sample_idx]#.numpy()  # 관로 수위
    corr = corrected_seq[sample_idx]#.numpy()
    inputs = input_seq[sample_idx]
    if not os.path.isdir(sav_path):
        os.makedirs(sav_path)
    plt.figure(figsize=(12, 4))
    plt.plot(orig, label='Original (vl)', marker='o')
    plt.plot(corr, label='Corrected (w2)', marker='x')
    plt.plot(inputs, label='Input', marker='+')
    plt.title(f'Sample {sample_idx}: Sewer Level Original  vs Corrected')
    plt.xlabel('Time index (minute)')
    plt.ylabel('Sewer Level')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()
    plt.savefig(f'{sav_path}/{sample_idx}.png')

In [None]:
def plot_anomalies_on_timeseries(original_seq, anomaly_flags, sav_path='', sample_idx=0):
    """
    Plot(scatter) the anomlay detection results on the graph
    original_seq : [N, T] or [N, T, 3]
    anomaly_flags : [N, T] (bool array)
    sample_idx(int)
    """
    vl = original_seq[sample_idx]  # shape: [T]
    flags = anomaly_flags[sample_idx]  # shape: [T], bool

    if not os.path.isdir(sav_path):
        os.makedirs(sav_path)

    plt.figure(figsize=(12, 4))
    plt.plot(vl, label='Sewer Level (vl)', color='blue')

    # Scatter the anomaly
    if flags.any():
        plt.scatter(np.where(flags)[0], vl[flags], color='red', label='Detected Anomaly', zorder=3)

    plt.title(f'Sample {sample_idx} - Detected {flags.sum()} Anomalies')
    plt.xlabel('Time index (minute)')
    plt.ylabel('Sewer Level')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    
    plt.savefig(f'{sav_path}/{sample_idx}_anomaly.png')
    plt.show()

In [None]:
def summarize_pointwise_anomaly_statistics(errors, anomaly_flags):
    """
    시점 단위 이상치 탐지 통계 요약
    errors: np.ndarray, shape [N] or [T] - pointwise 오차 벡터 (예: MAE or MSE)
    anomaly_flags: np.ndarray, shape [N] or [T] - 이상치 여부 (bool 배열)

    출력:
        - 총 시점 수
        - 이상치 시점 수 및 비율
        - 이상치 평균 오차
        - 정상값 평균 오차
    """
    total = len(errors)
    num_anomalies = np.sum(anomaly_flags)
    num_normals = total - num_anomalies

    mean_anomaly_error = np.mean(errors[anomaly_flags]) if num_anomalies > 0 else 0.0
    mean_normal_error = np.mean(errors[~anomaly_flags]) if num_normals > 0 else 0.0

    print("📊 시점 단위 이상치 탐지 통계 요약")
    print(f"- 총 시점 수: {total}")
    print(f"- 이상치 시점 수: {num_anomalies}")
    print(f"- 이상치 비율: {100.0 * num_anomalies / total:.2f}%")
    print(f"- 이상치 평균 오차: {mean_anomaly_error:.4f}")
    print(f"- 정상값 평균 오차: {mean_normal_error:.4f}")


In [None]:
def reconstruct_series_from_sequences(seq_array):
    """
    Merge the predicted sequences for all time step data
    seq_array: [N, T] numpy or tensor
    반환: [N+T-1] 배열 (시점 단위 결과)
    """
    # seq_array = seq_array.numpy()
    N, T = seq_array.shape
    summed = np.zeros(N + T - 1)
    count = np.zeros(N + T - 1)

    for i in range(N):
        summed[i:i+T] += seq_array[i]
        count[i:i+T] += 1

    return summed / count

## Load dataset (Several dataframes)

In [None]:
region = {0 : 'gwangjoo', 1 : 'changwon', 2 : 'pohang'}
fname = {0 : '2920010001045020', 1 : '4812110001018020', 2 : ''}
r_cd = 0

batch_size = 256
seq_len = 240
input_dim=3
hidden_dim=24
latent_dim=16

epochs = 100

data_dir = './trainset/sewer/'
# 파일 경로 정렬
train_paths = sorted(glob.glob(os.path.join(data_dir, 'train*.csv')))
test_paths = sorted(glob.glob(os.path.join(data_dir, 'org*.csv')))

# 'flag' 열 제외하고 불러오기
usecols = ['dates', 'rn', 'vl', 'wl']  # 필요한 컬럼만 명시

train_dfs = [pd.read_csv(path, encoding='cp949', usecols=usecols) for path in train_paths]
test_dfs = [pd.read_csv(path, encoding='cp949', usecols=usecols) for path in test_paths]


## Check NaNs

In [None]:
# 각 컬럼별 NaN 개수 누적용 (Series 초기화)
train_null = pd.Series(0, index=train_dfs[0].columns)
test_null = pd.Series(0, index=test_dfs[0].columns)

for train, test in zip(train_dfs, test_dfs):
    train_null += train.isnull().sum()
    test_null += test.isnull().sum()

print('Train set NaN 개수 (컬럼별):\n', train_null)
print('Test set NaN 개수 (컬럼별):\n', test_null)

# 전체 NaN 총합 출력
print(f"\nTrain set 전체 NaN 수: {train_null.sum()}")
print(f"Test set 전체 NaN 수: {test_null.sum()}")

## Data normalization (rain value, sewer level(vl), river level)

In [None]:
train_norm_dfs = []
test_norm_dfs = []
_, train_scaler = data_norm(pd.concat(train_dfs))

for train, test in zip(train_dfs, test_dfs):
    test.fillna(0.0, inplace=True)
    
    train_norm, _ = data_norm(train, scaler = train_scaler)
    test_norm, _ = data_norm(test, mode='test',scaler=train_scaler)
    
    test_norm['vl'] = test_norm['vl'].clip(lower= 0.0)
    
    train_norm_dfs.append(train_norm)
    test_norm_dfs.append(test_norm)

In [None]:
print('▶ Train set')
tmp_orig = pd.concat(train_dfs)
tmp_norm = pd.concat(train_norm_dfs)

print('Rain (rn) - Original min/max:', tmp_orig['rn'].min(), tmp_orig['rn'].max())
print('Rain (rn) - Normalized min/max:', tmp_norm['rn'].min(), tmp_norm['rn'].max())

print('Sewer level (vl) - Original min/max:', tmp_orig['vl'].min(), tmp_orig['vl'].max())
print('Sewer level (vl) - Normalized min/max:', tmp_norm['vl'].min(), tmp_norm['vl'].max())

print('River level (wl) - Original min/max:', tmp_orig['wl'].min(), tmp_orig['wl'].max())
print('River level (wl) - Normalized min/max:', tmp_norm['wl'].min(), tmp_norm['wl'].max())

print('\n▶ Test set')
tmp_test_orig = pd.concat(test_dfs)
tmp_test_norm = pd.concat(test_norm_dfs)

print('Test set Nan count:\n', tmp_test_orig.isnull().sum())

print('Rain (rn) - Original min/max:', tmp_test_orig['rn'].min(), tmp_test_orig['rn'].max())
print('Rain (rn) - Normalized min/max:', tmp_test_norm['rn'].min(), tmp_test_norm['rn'].max())

print('Sewer level (vl) - Original min/max:', tmp_test_orig['vl'].min(), tmp_test_orig['vl'].max())
print('Sewer level (vl) - Normalized min/max:', tmp_test_norm['vl'].min(), tmp_test_norm['vl'].max())

print('River level (wl) - Original min/max:', tmp_test_orig['wl'].min(), tmp_test_orig['wl'].max())
print('River level (wl) - Normalized min/max:', tmp_test_norm['wl'].min(), tmp_test_norm['wl'].max())


## Make dataset for the sequence data

In [None]:
train_seq, val_seq, train_date, test_date = create_combined_sequences(train_norm_dfs, test_norm_dfs, seq_len=seq_len)

print("Train shape:", train_seq.shape)  # (N_train, 60, 3)
print("Val shape:", val_seq.shape)      # (N_val, 60, 3)

In [None]:
train_date = np.stack(train_date)
test_date = np.stack(test_date)

## Initialize the model

In [None]:
model = TranAD(input_size=input_dim, hidden_size=hidden_dim, latent_size=latent_dim, seq_len=seq_len)
example = torch.randn((batch_size,seq_len, input_dim)) 
w1, w2, z = model(example)

print(w1.shape)  # torch.Size([32, 10, 3])
print(w2.shape)  # torch.Size([32, 10, 3])

In [None]:
summary(model, input_size=(example.shape)) 

## Create dataloader

In [None]:
# Create the data loader to load the dataset
train_loader, val_loader = create_dataloaders(train_seq, val_seq, batch_size=batch_size)

# Check the dataset shape
for x, y in train_loader:
    print(x.shape)  
    print(y.shape)  
    break


## Training

In [None]:
# Training Start
train_tranad(model, train_loader, device=device, num_epochs=epochs,sav_path=f'./sav/TrainAD_{seq_len}_rn_vl_wl_gc/')


## Load the model weights

In [None]:
model.load_state_dict(torch.load(f'./sav/USAD_{seq_len}_rn_vl_wl_gc/USAD_best.pth'))

## Predict and get the predicted sewer level

In [None]:
# Test
original_seq, corrected_seq, recon_errors = test_tranad(model, val_loader, device=device)


In [None]:
original_seq = inverse_norm(original_seq, train_scaler)
corrected_seq = inverse_norm(corrected_seq, train_scaler)

In [None]:
rain_value_g = test_dfs[0]['rn']
rain_value_c = test_dfs[1]['rn']

In [None]:
# Select the correted sequence only sewer level"
original_vl =  original_seq[:, :, 1]  # [N, 60]
corrected_vl = corrected_seq[:, :, 1]  # [N, 60]

In [None]:
recon_errors.shape

## Merge the sequences data to the all time step data ([N,T] -> [N+T-1,])

In [None]:
md = int(len(original_seq)/2)

In [None]:
# 2. 전체 시계열 복원
original_series_g = reconstruct_series_from_sequences(original_vl[:md])
corrected_series_g = reconstruct_series_from_sequences(corrected_vl[:md])
val_series_g = np.stack(test_dfs[0]['vl'].to_list())
original_series_c = reconstruct_series_from_sequences(original_vl[md:])
corrected_series_c = reconstruct_series_from_sequences(corrected_vl[md:])
val_series_c = np.stack(test_dfs[1]['vl'].to_list())

In [None]:
point_errors_g = (corrected_series_g - val_series_g) ** 2  # [N + T - 1]
point_errors_c = (corrected_series_c - val_series_c) ** 2  # [N + T - 1]

In [None]:
tt = pd.date_range('20240301','20241201',freq='T',inclusive='left')[1:]

In [None]:
def determine_threshold(scores, method='percentile', percentile=98):
    return np.percentile(scores, percentile)

In [None]:
# threshold_g = determine_threshold(point_errors_g, method='iqr')  # 또는 'mean_std'
# threshold_c = determine_threshold(point_errors_c, method='iqr')  # 또는 'mean_std'
threshold_g = determine_threshold(point_errors_g)  # 또는 'mean_std'
threshold_c = determine_threshold(point_errors_c)  # 또는 'mean_std'
threshold_g = max(threshold_g, 4.0).round()
threshold_c = max(threshold_c, 4.0).round()
anomaly_flags_g = point_errors_g > threshold_g  # [N + T - 1] 형태
anomaly_flags_c = point_errors_c > threshold_c  # [N + T - 1] 형태

In [None]:
print("광주 임계값:", threshold_g)
print("광주 탐지된 이상치 수:", np.sum(anomaly_flags_g))
print("창원 임계값:", threshold_c)
print("창원 탐지된 이상치 수:", np.sum(anomaly_flags_c))

In [None]:
print("광주 임계값:", threshold_g)
print("광주 탐지된 이상치 수:", np.sum(anomaly_flags_g))
print("창원 임계값:", threshold_c)
print("창원 탐지된 이상치 수:", np.sum(anomaly_flags_c))

In [None]:
original_norm_g = original_series_g.round()
corrected_norm_g = corrected_series_g.round()
input_seq_g = val_series_g.round()
original_norm_c = original_series_c.round()
corrected_norm_c = corrected_series_c.round()
input_seq_c = val_series_c.round()

In [None]:
def plot_decadal_timeseries_with_anomalies(
    tt, original, predicted, input_data=None, anomaly_flags=None,
    save_dir=None, interval_days=10, rain=None
):
    """
    시계열을 10일 단위로 나누어 시각화하며, 이상치는 빨간 점으로 표시하고
    강수량(rain)은 회색 실선으로 우측 y축에 시각화

    Parameters:
    - tt: DatetimeIndex, 시계열 시간
    - original: np.array [N]
    - predicted: np.array [N]
    - input_data: np.array [N] or None
    - anomaly_flags: np.array(bool) [N] or None
    - save_dir: 저장 폴더 경로
    - interval_days: 멀티 플롯 단위 기간 (기본 10일)
    - rain: 강수량 np.array [N] or None
    """
    import matplotlib.pyplot as plt
    import pandas as pd
    import os

    df = pd.DataFrame({
        'date': tt,
        'original': original,
        'predicted': predicted
    })
    if input_data is not None:
        df['input'] = input_data
    if anomaly_flags is not None:
        df['anomaly'] = anomaly_flags
    else:
        df['anomaly'] = False
    if rain is not None:
        df['rain'] = rain

    df = df.set_index('date')

    # 날짜 단위로 분할
    start_date = df.index.min()
    end_date = df.index.max()
    periods = pd.date_range(start=start_date, end=end_date, freq=f'{interval_days}D')

    for i in range(len(periods) - 1):
        t0, t1 = periods[i], periods[i+1]
        window = df[(df.index >= t0) & (df.index < t1)]

        if window.empty:
            continue

        fig, ax1 = plt.subplots(figsize=(14, 4))

        ax1.plot(window.index, window['original'], label='Original', color='black')
        ax1.plot(window.index, window['predicted'], label='Reconstructed', color='green', alpha=0.7)
        if 'input' in window.columns:
            ax1.plot(window.index, window['input'], label='Input', color='blue', alpha=0.5)

        # 이상치 시각화
        anomaly_points = window[window['anomaly']]
        if not anomaly_points.empty:
            ax1.scatter(
                anomaly_points.index,
                anomaly_points['original'],
                color='red',
                label='Anomaly',
                zorder=3
            )

        # 우측 y축: 강수량 (회색 실선)
        if 'rain' in window.columns:
            ax2 = ax1.twinx()
            ax2.plot(window.index, window['rain'], label='Rainfall', color='gray', linestyle='-', alpha=0.6)
            ax2.set_ylabel('Rainfall (mm)', color='gray')
            ax2.tick_params(axis='y', labelcolor='gray')

        ax1.set_title(f'{t0.date()} ~ {t1.date()} Sewer Level Reconstruction')
        ax1.set_xlabel('Date')
        ax1.set_ylabel('Sewer Level')
        ax1.grid(True)
        ax1.legend(loc='upper left')
        fig.tight_layout()

        if save_dir:
            os.makedirs(save_dir, exist_ok=True)
            fname = f"{t0.strftime('%Y%m%d')}_{t1.strftime('%Y%m%d')}.png"
            plt.savefig(os.path.join(save_dir, fname), dpi=300)
            plt.close()
            print(f"✅ Saved: {fname}")
        else:
            plt.show()


In [None]:
import matplotlib.dates as mdates
plot_decadal_timeseries_with_anomalies(
    tt=tt,
    original=original_norm_g, 
    predicted=corrected_norm_g, 
    input_data=input_seq_g,
    anomaly_flags=anomaly_flags_g,  
    save_dir=f'./imgs/TranAD_{seq_len}_gc/gwangjoo/',
    interval_days=5,
    rain = rain_value_g
)

In [None]:
import matplotlib.dates as mdates
plot_decadal_timeseries_with_anomalies(
    tt=tt,
    original=original_norm_c, 
    predicted=corrected_norm_c, 
    input_data=input_seq_c,
    anomaly_flags=anomaly_flags_c,  
    save_dir=f'./imgs/TranAD_{seq_len}_gc/changwon/',
    interval_days=5
)

## Gwangjoo anamaly statistics

In [None]:

summarize_pointwise_anomaly_statistics(point_errors_g, anomaly_flags_g)

## Changwon anamaly statistics

In [None]:

summarize_pointwise_anomaly_statistics(point_errors_c, anomaly_flags_c)