In [1]:
import os
import config
import pandas as pd
import numpy as np
import os
import torch
import torch.nn as nn
from torch.autograd import Function
from tqdm import tqdm
from utility_uad_svm import load_data, make_sequences, create_dataloaders, SeqDataset

#BASE_DIR = os.path.dirname(os.path.abspath(__file__))
base_dir = os.getcwd()
#print("当前工作目录:", base_dir)
parent_dir = os.path.dirname(os.path.dirname(base_dir))
csv_path = {0: os.path.join(parent_dir, "Transfer Learning\data_with_sat_type", "observation_data_case5_with_sat_type.csv"),
            1: os.path.join(parent_dir, "Transfer Learning\data_with_sat_type", "Case1_Urban_10Hz_with_sat_type.csv"),
            2: os.path.join(parent_dir, "Transfer Learning\data_with_sat_type", "Case1_Suburban_10Hz_with_sat_type.csv"),
            3: os.path.join(parent_dir, "Transfer Learning\data_with_sat_type", "Case2_Urban_10Hz_with_sat_type.csv"),
            4: os.path.join(parent_dir, "Transfer Learning\data_with_sat_type", "Case2_Suburban_10Hz_with_sat_type.csv"),
            5: os.path.join(parent_dir, "Transfer Learning\data_with_sat_type", "Case3_Urban_10Hz_with_sat_type.csv"),
            6: os.path.join(parent_dir, "Transfer Learning\data_with_sat_type", "Case3_Suburban_10Hz_with_sat_type.csv"),
            7: os.path.join(parent_dir, "Transfer Learning\data_with_sat_type", "Case4_Urban_10Hz_with_sat_type.csv"),
            8: os.path.join(parent_dir, "Transfer Learning\data_with_sat_type", "Case4_Suburban_10Hz_with_sat_type.csv"),
            }
for key, path in csv_path.items():
    print(f"Key: {key}, Path: {path}")

--- [Config] 正在使用的设备: cpu ---
Key: 0, Path: c:\Users\yangj\Desktop\4JYY\4JYY\Transfer Learning\data_with_sat_type\observation_data_case5_with_sat_type.csv
Key: 1, Path: c:\Users\yangj\Desktop\4JYY\4JYY\Transfer Learning\data_with_sat_type\Case1_Urban_10Hz_with_sat_type.csv
Key: 2, Path: c:\Users\yangj\Desktop\4JYY\4JYY\Transfer Learning\data_with_sat_type\Case1_Suburban_10Hz_with_sat_type.csv
Key: 3, Path: c:\Users\yangj\Desktop\4JYY\4JYY\Transfer Learning\data_with_sat_type\Case2_Urban_10Hz_with_sat_type.csv
Key: 4, Path: c:\Users\yangj\Desktop\4JYY\4JYY\Transfer Learning\data_with_sat_type\Case2_Suburban_10Hz_with_sat_type.csv
Key: 5, Path: c:\Users\yangj\Desktop\4JYY\4JYY\Transfer Learning\data_with_sat_type\Case3_Urban_10Hz_with_sat_type.csv
Key: 6, Path: c:\Users\yangj\Desktop\4JYY\4JYY\Transfer Learning\data_with_sat_type\Case3_Suburban_10Hz_with_sat_type.csv
Key: 7, Path: c:\Users\yangj\Desktop\4JYY\4JYY\Transfer Learning\data_with_sat_type\Case4_Urban_10Hz_with_sat_type.csv
Key

In [2]:
class Config:
    SEQ_LEN = 10        # 序列长度 (例如 10 个历元)
    STEP = 1            # 滑动窗口步长
    BATCH_SIZE = 64     # 批次大小
    
    # 请根据你的 CSV 实际列名修改这里！
    # 假设 CSV 中有这些列:
    FEATURE_COLS = ["cn0", "elevation", "pseudorange_corrected_cb", "doppler_shift","sat_type"] 
    TARGET_COL = 'multipath'     # 标签列 (LOS/NLOS)
    GROUP_COL = 'sv_id'      # 卫星 ID 列
    TIME_COL = 'gps_time'    # 时间戳列

config = Config()

In [3]:
def get_dataloaders(csv_path_dict):
    # ---------------------------------------------------------
    # A. 读取数据 (Load Data)
    # ---------------------------------------------------------
    print("\n[1/4] 正在读取 CSV 文件...")
    sim_dfs = []
    real_df = None

    for key, path in csv_path_dict.items():
        if not os.path.exists(path):
            print(f"  [警告] 文件不存在: {path}")
            continue
            
        df = pd.read_csv(path)
        
        # 简单清洗：去除特征列为空的行
        df = df.dropna(subset=config.FEATURE_COLS)

        if key == 0: # Key 0 是真实数据 (Target Domain)
            real_df = df
            print(f"  -> 已加载真实数据 (Target): {len(df)} 行")
        else:        # 其他 Key 是仿真数据 (Source Domain)
            sim_dfs.append(df)
    
    if not sim_dfs or real_df is None:
        raise ValueError("数据加载失败，请检查路径配置。")

    # 合并所有仿真数据
    sim_df_all = pd.concat(sim_dfs, ignore_index=True)
    print(f"  -> 已合并仿真数据 (Source): {len(sim_df_all)} 行")

    # ---------------------------------------------------------
    # B. 归一化 (Normalization)
    # ---------------------------------------------------------
    # 注意：必须在生成序列之前，对 DataFrame 的列进行归一化。
    # 由于真实数据的 Pseudorange Residual 极其巨大 (10^7)，必须分别归一化
    print("\n[2/4] 正在进行归一化处理...")

    # 1. 归一化源域 (Simulation)
    scaler_sim = MinMaxScaler()
    sim_df_all[config.FEATURE_COLS] = scaler_sim.fit_transform(sim_df_all[config.FEATURE_COLS])

    # 2. 归一化目标域 (Real World)
    # 单独 fit 目标域，以将巨大的残差映射到 [0, 1] 范围，方便 LSTM 提取相对特征
    scaler_real = MinMaxScaler()
    real_df[config.FEATURE_COLS] = scaler_real.fit_transform(real_df[config.FEATURE_COLS])

    print("  -> 归一化完成 (源域和目标域独立缩放)")

    # ---------------------------------------------------------
    # C. 创建序列 (Make Sequences) - 调用你的函数
    # ---------------------------------------------------------
    print("\n[3/4] 正在调用 make_sequences 生成时序数据...")

    # 1. 源域序列 (带标签)
    print("  -> 处理源域数据:")
    X_sim, y_sim = make_sequences(
        df=sim_df_all,
        features=config.FEATURE_COLS,
        target=config.TARGET_COL,
        seq_len=config.SEQ_LEN,
        step=config.STEP,
        group_col=config.GROUP_COL,
        time_col=config.TIME_COL
    )

    # 2. 目标域序列 (无标签 / 不使用标签)
    # DANN 中目标域通常被视为无标签，或者我们不传入 target 让函数只返回 X
    print("  -> 处理目标域数据:")
    X_real = make_sequences(
        df=real_df,
        features=config.FEATURE_COLS,
        target=None, # 目标域不提取标签用于训练 (或者你可以提取用于测试验证)
        seq_len=config.SEQ_LEN,
        step=config.STEP,
        group_col=config.GROUP_COL,
        time_col=config.TIME_COL
    )

    # ---------------------------------------------------------
    # D. 创建 Dataset 和 DataLoader - 使用你的类
    # ---------------------------------------------------------
    print("\n[4/4] 创建 PyTorch DataLoaders...")

    # 1. 源域 Dataset
    source_dataset = SeqDataset(X_sim, y_sim)
    
    # 2. 目标域 Dataset (没有标签，y会自动填0)
    target_dataset = SeqDataset(X_real, y=None)

    # 3. DataLoader
    # drop_last=True 非常关键！因为 DANN 训练时如果不丢弃最后一个不完整的 batch，
    # 可能导致源域和目标域 batch 尺寸不匹配报错。
    source_loader = DataLoader(
        source_dataset, 
        batch_size=config.BATCH_SIZE, 
        shuffle=True, 
        drop_last=True
    )
    
    target_loader = DataLoader(
        target_dataset, 
        batch_size=config.BATCH_SIZE, 
        shuffle=True, 
        drop_last=True
    )

    print(f"\n=== 完成 ===")
    print(f"源域 DataLoader: {len(source_loader)} batches")
    print(f"目标域 DataLoader: {len(target_loader)} batches")
    
    return source_loader, target_loader

# ---------------------------------------------------------
# 执行
# ---------------------------------------------------------
# 确保这一步之前已经运行了你的 import os ... csv_path 代码
source_loader, target_loader = get_dataloaders(csv_path)


[1/4] 正在读取 CSV 文件...
  -> 已加载真实数据 (Target): 11243 行
  -> 已合并仿真数据 (Source): 710063 行

[2/4] 正在进行归一化处理...


NameError: name 'MinMaxScaler' is not defined