## 取出GOCI2資料

以下是已filter出- 左上：Lat 47.4935º Lon123.0000º
- 右上：Lat 47.4935º Lon143.4700º
- 左下：Lat 24.0000º Lon123.0000º
- 右下：Lat 24.0000º Lon143.4700º

In [1]:
import netCDF4 as nc
import numpy as np
import pandas as pd

# 打開 NetCDF 檔案
file_path = 'GK2B_GOCI2_L2_20230918_011530_LA_Chl.nc'
dataset = nc.Dataset(file_path)

# 讀取 geophysical_data 群組中的變數
geophysical_data = dataset.groups['geophysical_data']
Chl = geophysical_data.variables['Chl'][:]
flag = geophysical_data.variables['flag'][:]

# 讀取 navigation_data 群組中的變數
navigation_data = dataset.groups['navigation_data']
latitude = navigation_data.variables['latitude'][:]
longitude = navigation_data.variables['longitude'][:]

# 設定範圍條件
lat_min, lat_max = 24.0000, 47.4935
lon_min, lon_max = 123.0000, 143.4700

# 找到符合條件的索引
lat_mask = (latitude >= lat_min) & (latitude <= lat_max)
lon_mask = (longitude >= lon_min) & (longitude <= lon_max)
combined_mask = lat_mask & lon_mask

# 取出符合條件的數據
latitude_filtered = latitude[combined_mask]
longitude_filtered = longitude[combined_mask]
Chl_filtered = Chl[combined_mask]
flag_filtered = flag[combined_mask]

# 轉換成一維數組
latitude_flat = latitude_filtered.flatten()
longitude_flat = longitude_filtered.flatten()
Chl_flat = Chl_filtered.flatten()
flag_flat = flag_filtered.flatten()

# 創建 DataFrame
data = {
    'Latitude': latitude_flat,
    'Longitude': longitude_flat,
    'Chlorophyll': Chl_flat,
    'Flag': flag_flat
}
df = pd.DataFrame(data)

# 匯出成 CSV 檔案
csv_file_path = 'GK2B_GOCI2_L2_20230918_011530_LA_Chl_filtered.csv'
df.to_csv(csv_file_path, index=False)

print(f"數據已匯出至 {csv_file_path}")



數據已匯出至 GK2B_GOCI2_L2_20230918_011530_LA_Chl_filtered.csv


In [1]:
# import netCDF4 as nc
# import numpy as np
# import pandas as pd
# 
# # 打開 NetCDF 檔案
# file_path = 'NC_H09_20230918_0110_r14_FLDK.02701_02601.nc'
# dataset = nc.Dataset(file_path)
# 
# # 讀取變數
# latitude = dataset.variables['latitude'][:]
# longitude = dataset.variables['longitude'][:]
# albedo_01 = dataset.variables['albedo_01'][:]
# albedo_02 = dataset.variables['albedo_02'][:]
# albedo_03 = dataset.variables['albedo_03'][:]
# albedo_04 = dataset.variables['albedo_04'][:]
# 
# # 將經緯度和反射率轉換成一維數組
# latitude_flat = np.tile(latitude, (longitude.shape[0], 1)).T.flatten()
# longitude_flat = np.tile(longitude, (latitude.shape[0], 1)).flatten()
# albedo_01_flat = albedo_01.flatten()
# albedo_02_flat = albedo_02.flatten()
# albedo_03_flat = albedo_03.flatten()
# albedo_04_flat = albedo_04.flatten()
# 
# # 創建 DataFrame
# data = {
#     'Latitude': latitude_flat,
#     'Longitude': longitude_flat,
#     'Albedo_01': albedo_01_flat,
#     'Albedo_02': albedo_02_flat,
#     'Albedo_03': albedo_03_flat,
#     'Albedo_04': albedo_04_flat
# }
# df = pd.DataFrame(data)
# 
# # 匯出成 CSV 檔案
# csv_file_path = 'NC_H09_20230918_0110_r14_FLDK.02701_02601_filtered.csv'
# df.to_csv(csv_file_path, index=False)
# 
# print(f"數據已匯出至 {csv_file_path}")
# 

數據已匯出至 NC_H09_20230918_0110_r14_FLDK.02701_02601_filtered.csv


In [2]:
import netCDF4 as nc
import numpy as np
import pandas as pd

# 打開 NetCDF 檔案
file_path = 'NC_H09_20230918_0110_r14_FLDK.02701_02601.nc'
dataset = nc.Dataset(file_path)

# 讀取變數
latitude = dataset.variables['latitude'][:]
longitude = dataset.variables['longitude'][:]
albedo_01 = dataset.variables['albedo_01'][:]
albedo_02 = dataset.variables['albedo_02'][:]
albedo_03 = dataset.variables['albedo_03'][:]
albedo_04 = dataset.variables['albedo_04'][:]

# 設定範圍條件
lat_min, lat_max = 24.0000, 47.4935
lon_min, lon_max = 123.0000, 143.4700

# 找到符合條件的索引
lat_mask = (latitude >= lat_min) & (latitude <= lat_max)
lon_mask = (longitude >= lon_min) & (longitude <= lon_max)

# 確保latitude和longitude的遮罩應用到albedo數據
lat_indices = np.where(lat_mask)[0]
lon_indices = np.where(lon_mask)[0]

# 使用範圍內的索引篩選數據
latitude_filtered = latitude[lat_indices]
longitude_filtered = longitude[lon_indices]
albedo_01_filtered = albedo_01[np.ix_(lat_indices, lon_indices)]
albedo_02_filtered = albedo_02[np.ix_(lat_indices, lon_indices)]
albedo_03_filtered = albedo_03[np.ix_(lat_indices, lon_indices)]
albedo_04_filtered = albedo_04[np.ix_(lat_indices, lon_indices)]

# 轉換成一維數組
latitude_flat = np.repeat(latitude_filtered, len(longitude_filtered))
longitude_flat = np.tile(longitude_filtered, len(latitude_filtered))
albedo_01_flat = albedo_01_filtered.flatten()
albedo_02_flat = albedo_02_filtered.flatten()
albedo_03_flat = albedo_03_filtered.flatten()
albedo_04_flat = albedo_04_filtered.flatten()

# 創建 DataFrame
data = {
    'Latitude': latitude_flat,
    'Longitude': longitude_flat,
    'Albedo_01': albedo_01_flat,
    'Albedo_02': albedo_02_flat,
    'Albedo_03': albedo_03_flat,
    'Albedo_04': albedo_04_flat
}
df = pd.DataFrame(data)

# 匯出成 CSV 檔案
csv_file_path = 'NC_H09_20230918_0110_r14_FLDK.02701_02601_filtered.csv'
df.to_csv(csv_file_path, index=False)

print(f"數據已匯出至 {csv_file_path}")


數據已匯出至 NC_H09_20230918_0110_r14_FLDK.02701_02601_filtered.csv


## 原本想降解析度

In [1]:
import numpy as np
import pandas as pd

# 假設已經讀取並轉換為 DataFrame 的資料
high_res_data = pd.read_csv('GK2B_GOCI2_L2_20230918_011530_LA_Chl_filtered.csv')  # 250m 解析度
low_res_data = pd.read_csv('NC_H09_20230918_0110_r14_FLDK.02701_02601_filtered.csv')   # 1km 解析度

# 重新排列高解析度數據的經緯度，以便於後續分組平均
high_res_data = high_res_data.sort_values(by=['Latitude', 'Longitude'])

# 提取高解析度數據
high_lat = high_res_data['Latitude'].values
high_lon = high_res_data['Longitude'].values
high_chlorophyll = high_res_data['Chlorophyll'].values

# 提取低解析度數據
low_lat = low_res_data['Latitude'].values
low_lon = low_res_data['Longitude'].values
low_albedo_01 = low_res_data['Albedo_01'].values

# 計算新的低解析度數據的經緯度範圍
new_latitudes = np.arange(high_lat.min(), high_lat.max(), 0.01)  # 每格1km
new_longitudes = np.arange(high_lon.min(), high_lon.max(), 0.01)  # 每格1km

# 初始化新的低解析度數據
new_low_res_data = {
    'Latitude': [],
    'Longitude': [],
    'Chlorophyll': [],
    'Albedo_01': []
}

# 遍歷新的低解析度經緯度範圍
for new_lat in new_latitudes:
    for new_lon in new_longitudes:
        # 找到對應的16個高解析度格子
        mask = ((high_lat >= new_lat) & (high_lat < new_lat + 0.01) &
                (high_lon >= new_lon) & (high_lon < new_lon + 0.01))
        if np.any(mask):
            # 對應的16個格子的平均值
            mean_chlorophyll = np.mean(high_chlorophyll[mask])
            mean_albedo_01 = np.mean(low_albedo_01[mask])

            # 存儲新的低解析度數據
            new_low_res_data['Latitude'].append(new_lat)
            new_low_res_data['Longitude'].append(new_lon)
            new_low_res_data['Chlorophyll'].append(mean_chlorophyll)
            new_low_res_data['Albedo_01'].append(mean_albedo_01)

# 創建新的低解析度 DataFrame
new_low_res_df = pd.DataFrame(new_low_res_data)

# 匯出新的低解析度數據
new_low_res_df.to_csv('new_low_res_data.csv', index=False)

print("數據已重取樣並匯出至 new_low_res_data.csv")


IndexError: boolean index did not match indexed array along dimension 0; dimension is 4812800 but corresponding boolean dimension is 52770165

## 改使用最近鄰方法來對齊經緯度

太久失敗的方法

In [2]:
import pandas as pd
from geopy.distance import great_circle

# 讀取CSV文件
goci2_data = pd.read_csv("GK2B_GOCI2_L2_20230918_011530_LA_Chl_filtered.csv")
himawari_data = pd.read_csv("NC_H09_20230918_0110_r14_FLDK.02701_02601_filtered.csv")

# 定義函數找到最接近的經緯度
def find_closest(lat, lon, df):
    min_distance = float('inf')
    closest_row = None
    for index, row in df.iterrows():
        distance = great_circle((lat, lon), (row['Latitude'], row['Longitude'])).meters
        if distance < min_distance:
            min_distance = distance
            closest_row = row
    return closest_row

# 創建新資料集，存儲匹配的數據
combined_data = []

for index, row in goci2_data.iterrows():
    if pd.isna(row['Chlorophyll']):
        continue
    closest_row = find_closest(row['Latitude'], row['Longitude'], himawari_data)
    combined_row = {
        'GOCI2_Latitude': row['Latitude'],
        'GOCI2_Longitude': row['Longitude'],
        'Chlorophyll': row['Chlorophyll'],
        'Himawari_Latitude': closest_row['Latitude'],
        'Himawari_Longitude': closest_row['Longitude'],
        'Albedo_01': closest_row['Albedo_01'],
        'Albedo_02': closest_row['Albedo_02'],
        'Albedo_03': closest_row['Albedo_03'],
        'Albedo_04': closest_row['Albedo_04']
    }
    combined_data.append(combined_row)

# 轉換為DataFrame並保存為新的CSV文件
combined_df = pd.DataFrame(combined_data)
combined_df.to_csv("combined_dataset.csv", index=False)


KeyboardInterrupt: 

換個演算法
只取前10行

In [5]:
import pandas as pd
from scipy.spatial import KDTree
import numpy as np

# 讀取CSV文件
goci2_data = pd.read_csv("GK2B_GOCI2_L2_20230918_011530_LA_Chl_filtered.csv")
himawari_data = pd.read_csv("NC_H09_20230918_0110_r14_FLDK.02701_02601_filtered.csv")

# 濾除缺失值
goci2_data = goci2_data.dropna(subset=['Chlorophyll'])

# 只取前10行
goci2_data = goci2_data.head(10)

# 建立KDTree
himawari_coords = himawari_data[['Latitude', 'Longitude']].to_numpy()
tree = KDTree(himawari_coords)

# 創建新資料集，存儲匹配的數據
combined_data = []

for index, row in goci2_data.iterrows():
    dist, idx = tree.query([row['Latitude'], row['Longitude']])
    closest_row = himawari_data.iloc[idx]
    combined_row = {
        'GOCI2_Latitude': row['Latitude'],
        'GOCI2_Longitude': row['Longitude'],
        'Chlorophyll': row['Chlorophyll'],
        'Himawari_Latitude': closest_row['Latitude'],
        'Himawari_Longitude': closest_row['Longitude'],
        'Albedo_01': closest_row['Albedo_01'],
        'Albedo_02': closest_row['Albedo_02'],
        'Albedo_03': closest_row['Albedo_03'],
        'Albedo_04': closest_row['Albedo_04']
    }
    combined_data.append(combined_row)

# 轉換為DataFrame並保存為新的CSV文件
combined_df = pd.DataFrame(combined_data)
combined_df.to_csv("combined_dataset.csv", index=False)


全部的

In [7]:
import pandas as pd
from scipy.spatial import KDTree
import numpy as np

# 讀取CSV文件
goci2_data = pd.read_csv("GK2B_GOCI2_L2_20230918_011530_LA_Chl_filtered.csv")
himawari_data = pd.read_csv("NC_H09_20230918_0110_r14_FLDK.02701_02601_filtered.csv")

# 濾除缺失值
goci2_data = goci2_data.dropna(subset=['Chlorophyll'])

# 建立KDTree
himawari_coords = himawari_data[['Latitude', 'Longitude']].to_numpy()
tree = KDTree(himawari_coords)

# 創建新資料集，存儲匹配的數據
combined_data = []

for index, row in goci2_data.iterrows():
    dist, idx = tree.query([row['Latitude'], row['Longitude']])
    closest_row = himawari_data.iloc[idx]
    combined_row = {
        'GOCI2_Latitude': row['Latitude'],
        'GOCI2_Longitude': row['Longitude'],
        'Himawari_Latitude': closest_row['Latitude'],
        'Himawari_Longitude': closest_row['Longitude'],
        'Chlorophyll': row['Chlorophyll'],
        'Albedo_01': closest_row['Albedo_01'],
        'Albedo_02': closest_row['Albedo_02'],
        'Albedo_03': closest_row['Albedo_03'],
        'Albedo_04': closest_row['Albedo_04']
    }
    combined_data.append(combined_row)

# 轉換為DataFrame並保存為新的CSV文件
combined_df = pd.DataFrame(combined_data)
combined_df.to_csv("combined_dataset_all.csv", index=False)
