## Hyplus File Analysis

All tool functions are made by Akira37 from Hyperplasma. Keep in mind that all relative paths start from this file.

In [9]:
import os
import pandas as pd
import numpy as np

### Analyze Dataset

In [None]:
def analyze_dataset(data_path):
    """
    分析数据集（csv文件）
    """
    if not os.path.exists(data_path):
        print(f"[Error] Dataset file not found: {data_path}")
        return
    print(f"\nDataset path: {data_path}")

    # 只读取前1000行，防止大文件卡死
    try:
        df = pd.read_csv(data_path, encoding='utf-8')
    except UnicodeDecodeError:
        df = pd.read_csv(data_path, encoding='gbk')
    except Exception as e:
        print(f"[Error] Failed to read CSV: {e}")
        return

    print(f"\nShape: {df.shape}")
    print(f"\nColumns: {list(df.columns)}")
    print("\nDtypes:")
    print(df.dtypes)
    print("\nMissing values per column:")
    print(df.isnull().sum())
    print("\nDescriptive statistics:")
    print(df.describe(include='all').T)

    print("\nUnique value count per column:")
    for col in df.columns:
        nunique = df[col].nunique(dropna=False)
        print(f"  {col}: {nunique}")

    print("\nFirst 5 rows:")
    print(df.head())
    print("\nLast 5 rows:")
    print(df.tail())


data_path="../dataset/weather.csv"
analyze_dataset(data_path)

### Analyze Npy File



In [None]:
def analyze_npy_file(npy_path):
    """
    分析npy文件内容，输出shape、dtype、min、max、mean、std、前后各5个样本内容（如为多维则只展示部分切片）。
    """
    if not os.path.exists(npy_path):
        print(f"[Error] npy文件不存在: {npy_path}")
        return
    arr = np.load(npy_path)
    print(f"\nFile: {npy_path}")
    print(f"Shape: {arr.shape}")
    print(f"Dtype: {arr.dtype}")
    print(f"Min: {arr.min():.4f}, Max: {arr.max():.4f}")
    print(f"Mean: {arr.mean():.4f}, Std: {arr.std():.4f}")
    # 展示前后5个样本
    n = arr.shape[0] if arr.ndim > 0 else 1
    print("\nFirst 5 samples:")
    if arr.ndim == 1:
        print(arr[:5])
    elif arr.ndim == 2:
        print(arr[:5, :])
    elif arr.ndim == 3:
        print(arr[:5, :, :])
    else:
        print(arr[:5])
    print("\nLast 5 samples:")
    if arr.ndim == 1:
        print(arr[-5:])
    elif arr.ndim == 2:
        print(arr[-5:, :])
    elif arr.ndim == 3:
        print(arr[-5:, :, :])
    else:
        print(arr[-5:])


# npy_path="../outputs/PatchTST/weather/preds_weather_inv.npy"
npy_path="../outputs/PatchTST/weather/trues_weather_inv.npy"
analyze_npy_file(npy_path)

### Print File Tree

In [15]:
def print_file_tree(root_path, max_files_per_level=30, prefix=""):
    """
    打印文件夹树结构，每层最多显示max_files_per_level个文件，多余用"..."隐藏。
    不显示隐藏文件（以.开头）。
    :param root_path: 根目录路径
    :param max_files_per_level: 每层最多显示的文件/文件夹数
    :param prefix: 前缀（递归用）
    """
    try:
        items = sorted([f for f in os.listdir(root_path) if not f.startswith('.')])
    except Exception as e:
        print(prefix + "[无法访问]")
        return

    count = 0
    for i, name in enumerate(items):
        if count >= max_files_per_level:
            print(prefix + "...")
            break
        path = os.path.join(root_path, name)
        connector = "├── " if i < len(items) - 1 else "└── "
        print(prefix + connector + name)
        if os.path.isdir(path):
            new_prefix = prefix + ("│   " if i < len(items) - 1 else "    ")
            print_file_tree(path, max_files_per_level, new_prefix)
        count += 1


root_path = ".."
print_file_tree(root_path, max_files_per_level=15)

├── LICENSE
├── README.md
├── dataset
│   ├── ETTh1.csv
│   ├── ETTh2.csv
│   ├── ETTm1.csv
│   ├── ETTm2.csv
│   ├── Electric_Production.csv
│   ├── electricity.csv
│   ├── exchange_rate.csv
│   ├── national_illness.csv
│   ├── traffic.csv
│   └── weather.csv
├── docs
│   ├── Autoformer.md
│   ├── PatchTST.md
│   └── Transformer.md
├── outputs
│   ├── LSTM
│   │   └── weather
│   │       ├── best_weather.pth
│   │       ├── checkpoint_weather.pth
│   │       └── train_log_weather.txt
│   ├── PatchTST
│   │   └── weather
│   │       ├── best_weather.pth
│   │       ├── checkpoint_weather.pth
│   │       ├── figures
│   │       │   └── var0.png
│   │       ├── preds_weather_inv.npy
│   │       ├── test_result_weather.txt
│   │       ├── train_log_weather.txt
│   │       └── trues_weather_inv.npy
│   └── Transformer
│       └── weather
├── requirements.txt
├── test
│   ├── PatchTST_README.md
│   └── test.ipynb
├── todo.txt
├── tsf
│   ├── common
│   │   ├── __pycache__
│   │   │   ├── co