In [21]:
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import rasterio
import xarray as xr
import rioxarray as rxr

In [22]:
root = r"C:\Users\duong\Documents\UET-VNU\K68-CS1\HKII_2024-2025\PM2.5\Feature_Maps"

In [23]:
with rasterio.open(os.path.join(root, "SQRT_SEA_DEM_LAT.tif")) as src:
    sqrt_arr = src.read(1)
    nod_sqrt = src.nodata   
    transform = src.transform

In [24]:
nrow, ncol = sqrt_arr.shape
rows, cols = np.indices((nrow, ncol))

In [25]:
import rasterio.transform


records = []
for var in ["PRES2M","RH","TMP","TN","TP","TX","WDIR","WSPD"]:
    for fp in sorted(glob.glob(os.path.join(root, var, f"{var}_*.tif"))):
        # parse ngày
        date_str = os.path.basename(fp).split("_")[-1].replace(".tif","")
        time = pd.to_datetime(date_str, format="%Y%m%d")
        with rasterio.open(fp) as src:
            arr = src.read(1); nod = src.nodata
        
        # mask pixel không hợp lệ
        mask = (arr != nod) & (sqrt_arr != nod_sqrt)
        
        lat, lon = rasterio.transform.xy(transform, cols[mask], rows[mask], offset='center')
        
        records.append(pd.DataFrame({
            "time": time,
            "row":  rows[mask].ravel(),
            "col":  cols[mask].ravel(),
            "lat": np.array(lat).ravel(),  # Thêm latitude
            "lon": np.array(lon).ravel(),
            "variable": var,
            "value":    arr[mask].ravel(),
            "SQRT_SEA_DEM_LAT": sqrt_arr[mask].ravel()
        }))


In [26]:
df_long = pd.concat(records, ignore_index=True)

In [27]:
vars_order = ["PRES2M","RH","TMP","TN","TP","TX","WDIR","WSPD"]

df_wide = (
    df_long
      .pivot(index=["time","row","col","lat", "lon","SQRT_SEA_DEM_LAT"],
             columns="variable",
             values="value")
      .reset_index()
)

In [28]:
df_wide.columns.name = None
df_wide = df_wide[["time","row","col", "lat", "lon", "SQRT_SEA_DEM_LAT"] + vars_order]

In [29]:
df_wide.head(20)

Unnamed: 0,time,row,col,lat,lon,SQRT_SEA_DEM_LAT,PRES2M,RH,TMP,TN,TP,TX,WDIR,WSPD
0,2021-12-30,0,316,102.155,20.225,1.303528,86996.75,79.206245,11.043369,8.35998,0.0,15.215998,185.410431,1.623216
1,2021-12-30,0,317,102.155,20.215,1.303528,86996.75,79.206245,11.043369,8.35998,0.0,15.215998,185.410431,1.623216
2,2021-12-30,1,315,102.165,20.235,1.303528,87366.351562,81.731247,10.776867,8.35998,0.0,14.320002,181.923721,1.581845
3,2021-12-30,1,316,102.165,20.225,1.303528,87366.351562,81.731255,10.776867,8.35998,0.0,14.320002,181.923721,1.581845
4,2021-12-30,1,317,102.165,20.215,1.303528,87366.351562,81.731247,10.776867,8.35998,0.0,14.320002,181.923721,1.581845
5,2021-12-30,1,318,102.165,20.205,1.283406,87366.351562,81.731247,10.776867,8.35998,0.0,14.320002,181.923721,1.581845
6,2021-12-30,2,315,102.175,20.235,1.100273,87735.953125,84.256256,10.510365,8.35998,0.0,13.424006,178.437012,1.540474
7,2021-12-30,2,316,102.175,20.225,1.100273,87735.953125,84.256256,10.510365,8.35998,0.0,13.424006,178.437012,1.540474
8,2021-12-30,2,317,102.175,20.215,1.100273,87735.953125,84.256256,10.510365,8.35998,0.0,13.424006,178.437012,1.540474
9,2021-12-30,2,318,102.175,20.205,1.203841,87735.953125,84.256256,10.510365,8.35998,0.0,13.424006,178.437012,1.540474


In [30]:
df_wide.to_csv('features_map.csv', index=False)