In [1]:
# Reading/Writing Data
import os
import glob
import numpy as np
import math 

# Pytorch
import torch 
from torch.utils.data import Subset, DataLoader

# Self-Defined Package
from SMAPDataset import SMAPDataset
from SMCNN import SMCNN
from SDLoss import physics_loss
from Preprocessing import random_spatial_sequence, collate_fn

ModuleNotFoundError: No module named 'SMCNN'

# Some Utility Functions

In [None]:
def same_seed(seed): 
    '''Fixes random number generator seeds for reproducibility.'''
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

# Configurations
`config` contains hyper-parameters for training and the path to save your model.

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
config = {
    'seed': 5201314,      # Your seed number, you can pick your lucky number. :)
    'test_ratio': 0,
    'root': 'D:\\1GRADUATED\\paper\\downscaling_data\\Soil_moisture_downscale_czt\\DATASET\\'
}

# Dataloader
Read data from files and set up training, validation, and testing sets.

In [None]:
# # Set seed for reproducibility
same_seed(config['seed'])

In [None]:
train_sequence = {}
test_sequence = {}
s2s_dir = config['root'] + 'LABEL\\SMAPID2INSITUID'
subdir_list = sorted(os.listdir(s2s_dir))

# 遍历指定路径下的所有子目录
for subdir in subdir_list:
    subdir_path = os.path.join(s2s_dir, subdir)
    if os.path.isdir(subdir_path):
        # 如果子目录是目录而不是文件，则将子目录加入字典
        full_spatial_sequence_smap = sorted([int(f.split('.')[0]) for f in os.listdir(subdir_path) if f.endswith('.npy')]) # !!!! read out of order
        print(len(full_spatial_sequence_smap), 'of Full Spatial Sequence for', subdir, ':')
        print(full_spatial_sequence_smap)
        
        # Split the train\valid\test dataset by spatial dimension
        train_sequence[subdir], test_sequence[subdir] = random_spatial_sequence(1-config['test_ratio'], full_spatial_sequence_smap)

In [None]:
# Initialize the dataset
train_dataset = SMAPDataset(config['root'], train_sequence)

In [None]:
train_dataset.__len__()

In [None]:
process_data = [x['processed_data'] for x in train_dataset]
label_data = [x['label_data'] for x in train_dataset]
meta_data = [x['meta_data'] for x in train_dataset]

# Dataset Checking Manually

In [None]:
date = [x['date'] for x in meta_data]
smapid = [x['smapid'] for x in meta_data]
insituid = [x['insituid'] for x in meta_data]

In [None]:
import random

start = 1
end = 409
num_samples = 1

samples = np.random.randint(start, end+1, size=num_samples)
print(samples)

In [None]:
import datetime

# The index of day start from 1
# day 从 1 开始
def date2day(date, fm="%Y-%m-%d"):
    date_object = datetime.datetime.strptime(date, fm)
    day_of_year = date_object.timetuple().tm_yday
    return day_of_year

def day2date(year, day):
    date = datetime.datetime(year, 1, 1) + datetime.timedelta(day - 1)
    month = date.month
    day = date.day
    return date

In [None]:
for i in samples:
    day = date[i].split('2015')[-1]
    year = date[i].split(day)[0]
    _date = day2date(int(year), int(day)).strftime("%Y-%m-%d")
    print('日期：{} {}'.format(_date, date[i]))
    print('SMAPID：{}'.format(smapid[i]))
    print('smap_norm：{}'.format([process_data[i][0][0][0]]))
    print('len of list：{} {}'.format(len(insituid[i]), len(label_data[i])))
    for j in range(len(insituid[i])):
        print('站点ID：{}'.format(insituid[i][j]))
        print('sm：{}'.format(label_data[i][j][0]))
        print('smap_unorm：{}'.format(label_data[i][j][1]))
        print('ati, atim, atisd：{}'.format(label_data[i][j][2]))

In [None]:
root = 'D:\\1GRADUATED\\paper\\downscaling_data\\Soil_moisture_downscale_czt\\DATASET\\LABEL\\SMAP\\2015285\\'

In [None]:
arr = np.load(root+'15.npy')
print(arr)

In [None]:
root = 'D:\\1GRADUATED\\paper\\downscaling_data\\Soil_moisture_downscale_czt\\DATASET\\LABEL\\ATI\\2015285\\'

In [None]:
arr = np.load(root+'4.npy', allow_pickle=True)
print(arr)

In [None]:
print(date2day('2015-05-01'))

## 经过检验：
> - dataset读取的数据与文件夹里的相同
> - 文件夹里的数据（smap，ati）与arcmap对齐后提取的excel里的数据相同
> - 提取的excel与原始tiff相同

## 检查smap与texture是否对齐

In [None]:
x = 2
y = 2
for i in samples:
    print(process_data[i][2, 2, 0])
    print(process_data[i][2, 2, 1])
    print(process_data[i][2, 2, 2])

# Correlation Analysis