In [4]:
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import rasterio
import xarray as xr
import rioxarray as rxr

In [5]:
root = r"C:\Users\duong\Documents\UET-VNU\K68-CS1\HKII_2024-2025\PM2.5\Feature_Maps"

In [6]:
with rasterio.open(os.path.join(root, "SQRT_SEA_DEM_LAT.tif")) as src:
    sqrt_arr = src.read(1)
    nod_sqrt = src.nodata   

In [7]:
nrow, ncol = sqrt_arr.shape
rows, cols = np.indices((nrow, ncol))

In [8]:
records = []
for var in ["PRES2M","RH","TMP","TN","TP","TX","WDIR","WSPD"]:
    for fp in sorted(glob.glob(os.path.join(root, var, f"{var}_*.tif"))):
        # parse ngày
        date_str = os.path.basename(fp).split("_")[-1].replace(".tif","")
        time = pd.to_datetime(date_str, format="%Y%m%d")
        with rasterio.open(fp) as src:
            arr = src.read(1); nod = src.nodata
        
        # mask pixel không hợp lệ
        mask = (arr != nod) & (sqrt_arr != nod_sqrt)
        
        records.append(pd.DataFrame({
            "time": time,
            "row":  rows[mask].ravel(),
            "col":  cols[mask].ravel(),
            "variable": var,
            "value":    arr[mask].ravel(),
            "SQRT_SEA_DEM_LAT": sqrt_arr[mask].ravel()
        }))


In [9]:
df_long = pd.concat(records, ignore_index=True)

In [10]:
vars_order = ["PRES2M","RH","TMP","TN","TP","TX","WDIR","WSPD"]

df_wide = (
    df_long
      .pivot(index=["time","row","col","SQRT_SEA_DEM_LAT"],
             columns="variable",
             values="value")
      .reset_index()
)

In [11]:
df_wide.columns.name = None
df_wide = df_wide[["time","row","col","SQRT_SEA_DEM_LAT"] + vars_order]

In [12]:
df_wide.head()

Unnamed: 0,time,row,col,SQRT_SEA_DEM_LAT,PRES2M,RH,TMP,TN,TP,TX,WDIR,WSPD
0,2021-12-30,0,316,1.303528,86996.75,79.206245,11.043369,8.35998,0.0,15.215998,185.410431,1.623216
1,2021-12-30,0,317,1.303528,86996.75,79.206245,11.043369,8.35998,0.0,15.215998,185.410431,1.623216
2,2021-12-30,1,315,1.303528,87366.351562,81.731247,10.776867,8.35998,0.0,14.320002,181.923721,1.581845
3,2021-12-30,1,316,1.303528,87366.351562,81.731255,10.776867,8.35998,0.0,14.320002,181.923721,1.581845
4,2021-12-30,1,317,1.303528,87366.351562,81.731247,10.776867,8.35998,0.0,14.320002,181.923721,1.581845


In [14]:
df_wide.to_csv('features_map.csv', index=False)