## Hyplus File Analysis

All tool functions are made by Akira37 from Hyperplasma. Keep in mind that all relative paths start from this file.

In [2]:
import os
import pandas as pd
import numpy as np

### Analyze Dataset

In [None]:
def analyze_dataset(data_path):
    """
    分析数据集（csv文件）
    """
    if not os.path.exists(data_path):
        print(f"[Error] Dataset file not found: {data_path}")
        return
    print(f"\nDataset path: {data_path}")

    # 只读取前1000行，防止大文件卡死
    try:
        df = pd.read_csv(data_path, encoding='utf-8')
    except UnicodeDecodeError:
        df = pd.read_csv(data_path, encoding='gbk')
    except Exception as e:
        print(f"[Error] Failed to read CSV: {e}")
        return

    print(f"\nShape: {df.shape}")
    print(f"\nColumns: {list(df.columns)}")
    print("\nDtypes:")
    print(df.dtypes)
    print("\nMissing values per column:")
    print(df.isnull().sum())
    print("\nDescriptive statistics:")
    print(df.describe(include='all').T)

    print("\nUnique value count per column:")
    for col in df.columns:
        nunique = df[col].nunique(dropna=False)
        print(f"  {col}: {nunique}")

    print("\nFirst 5 rows:")
    print(df.head())
    print("\nLast 5 rows:")
    print(df.tail())


data_path="../dataset/weather.csv"
analyze_dataset(data_path)

### Analyze Npy File



In [3]:
def analyze_npy_file(npy_path):
    """
    分析npy文件内容，输出shape、dtype、min、max、mean、std、前后各5个样本内容（如为多维则只展示部分切片）。
    """
    if not os.path.exists(npy_path):
        print(f"[Error] npy文件不存在: {npy_path}")
        return
    arr = np.load(npy_path)
    print(f"\nFile: {npy_path}")
    print(f"Shape: {arr.shape}")
    print(f"Dtype: {arr.dtype}")
    print(f"Min: {arr.min():.4f}, Max: {arr.max():.4f}")
    print(f"Mean: {arr.mean():.4f}, Std: {arr.std():.4f}")
    # 展示前后5个样本
    n = arr.shape[0] if arr.ndim > 0 else 1
    print("\nFirst 5 samples:")
    if arr.ndim == 1:
        print(arr[:5])
    elif arr.ndim == 2:
        print(arr[:5, :])
    elif arr.ndim == 3:
        print(arr[:5, :, :])
    else:
        print(arr[:5])
    print("\nLast 5 samples:")
    if arr.ndim == 1:
        print(arr[-5:])
    elif arr.ndim == 2:
        print(arr[-5:, :])
    elif arr.ndim == 3:
        print(arr[-5:, :, :])
    else:
        print(arr[-5:])


# npy_path="../outputs/PatchTST/weather/preds_weather_inv.npy"
npy_path="../outputs/PatchTST/weather/trues_weather_inv.npy"
analyze_npy_file(npy_path)


File: ../outputs/PatchTST/weather/trues_weather_inv.npy
Shape: (10445, 96)
Dtype: float64
Min: -3.0600, Max: 20.6500
Mean: 5.9129, Std: 5.0027

First 5 samples:
[[ 7.17000011  7.04000005  7.05000081  6.96999964  6.80999975  6.81999928
   6.90000045  6.56999991  6.09000025  5.84000088  5.67000023  5.09999985
   4.75999978  5.09000032  4.97999932  4.85000049  4.15999958  3.80999998
   4.09999993  4.06000057  4.04999981  3.84999995  3.67000038  3.77000001
   3.75000033  3.50999988  3.59999997  3.75000033  3.59999997  3.21999992
   2.73000011  2.77000008  2.6599997   2.6599997   2.89        3.04000036
   2.91000029  2.57999975  2.6599997   2.62999987  2.07999979  2.10000008
   2.10000008  2.10000008  2.23000015  2.27000012  2.07999979  1.73999972
   2.14000006  2.86999971  2.79999991  2.14000006  2.27000012  2.60000005
   2.73999964  2.83999988  2.78999976  2.99000024  3.15000013  3.20000025
   3.10000001  2.99000024  3.06000004  3.28999972  3.59999997  3.70000021
   3.77000001  3.5799996

### Print File Tree

In [None]:
def print_file_tree(root_path, max_files_per_level=30, prefix=""):
    """
    打印文件夹树结构，每层最多显示max_files_per_level个文件，多余用"..."隐藏。
    不显示隐藏文件（以.开头）。
    :param root_path: 根目录路径
    :param max_files_per_level: 每层最多显示的文件/文件夹数
    :param prefix: 前缀（递归用）
    """
    try:
        items = sorted([f for f in os.listdir(root_path) if not f.startswith('.')])
    except Exception as e:
        print(prefix + "[无法访问]")
        return

    count = 0
    for i, name in enumerate(items):
        if count >= max_files_per_level:
            print(prefix + "...")
            break
        path = os.path.join(root_path, name)
        connector = "├── " if i < len(items) - 1 else "└── "
        print(prefix + connector + name)
        if os.path.isdir(path):
            new_prefix = prefix + ("│   " if i < len(items) - 1 else "    ")
            print_file_tree(path, max_files_per_level, new_prefix)
        count += 1


root_path = ".."
print_file_tree(root_path, max_files_per_level=15)