In [1]:
import pandas as pd
from scipy.spatial.distance import cdist
import warnings
from pandas.errors import DtypeWarning
from scipy.spatial import cKDTree
import numpy as np
warnings.simplefilter(action='ignore', category=DtypeWarning)

In [2]:
def idw_interpolation(known_coords, known_values, unknown_coords, power=2):
    # 使用已知坐标创建cKDTree对象，以便高效查询最近的邻居
    tree = cKDTree(known_coords)
    
    # 对每个未知点查询其k个最近的邻居，我们这里使用3个邻居
    # distances存储到k个最近邻居的距离，indices存储k个最近邻居的索引
    distances, indices = tree.query(unknown_coords, k=10)
    
    # 避免除以零的情况，如果距离为0，将其替换为一个非常小的数
    distances[distances == 0] = 1e-10
    
    # 根据距离的倒数计算权重，权重与距离的倒数成比例
    # power参数用于调整权重的影响，power越大，近处的点权重越大
    weights = 1 / distances ** power
    
    # 对权重进行归一化，确保每个未知点的权重总和为1
    weights /= weights.sum(axis=1, keepdims=True)
    
    # 使用权重和已知值计算插值值，采用加权平均的方式
    # np.einsum用于执行权重与已知值的加权求和
    interpolated_values = np.einsum('ij,ij->i', weights, known_values[indices])
    
    # 返回插值值
    return interpolated_values


In [3]:
def perform_idw(group, feature_name):
    known_coords = group[['lon_x', 'lat_x']].values  # 已知坐标（站点）
    known_values = group[feature_name].values        # 已知值（例如最高温度），使用传入的特性名称
    unknown_coords = group[['lon_y', 'lat_y']].drop_duplicates().values # 未知坐标（火点）
    interpolated_values = idw_interpolation(known_coords, known_values, unknown_coords) # 执行IDW插值
    return pd.DataFrame({  # 返回插值结果
        'date': group['date'].iloc[0],
        'year': group['year_y'].iloc[0],
        'month': group['month_y'].iloc[0],
        'day': group['day_y'].iloc[0],
        'area': group['area'].iloc[0],
        'lon': unknown_coords[:, 0],
        'lat': unknown_coords[:, 1],
         feature_name: interpolated_values  # 使用特性名称作为列名
    })


火点数据集处理
---

In [47]:
fire = pd.read_excel(r'E:\dataset\fire2010-2017.xls')

In [48]:
provinces = ['辽宁省', '黑龙江省', '吉林省']
fire = fire[fire['地区'].isin(provinces)]
name = {'地区':'area',
        '图像日期':'time',
        '东经':'lon',
        '北纬':'lat'
}
#
fire.rename(columns=name, inplace=True)
fire['time'] = pd.to_datetime(fire['time'])
fire['year'] = fire['time'].dt.year
fire['month'] = fire['time'].dt.month
fire['day'] = fire['time'].dt.day
fire['hour'] = fire['time'].dt.hour
fire['date'] = fire['time'].dt.date
fire['date'] = pd.to_datetime(fire['date'])

In [49]:
fire.head()

Unnamed: 0,area,time,lon,lat,year,month,day,hour,date
2164,辽宁省,2010-03-10 12:41:00,120.1782,40.9771,2010,3,10,12,2010-03-10
2833,辽宁省,2010-03-30 10:18:00,122.3467,40.4534,2010,3,30,10,2010-03-30
2834,辽宁省,2010-03-30 11:23:00,122.3741,40.4504,2010,3,30,11,2010-03-30
2837,辽宁省,2010-04-01 13:07:00,121.39,41.4604,2010,4,1,13,2010-04-01
2838,辽宁省,2010-04-01 13:49:00,121.3935,41.457,2010,4,1,13,2010-04-01


站点数据读取
---

In [51]:
sta = pd.read_csv('dataset.csv')
sta['date'] = pd.to_datetime(sta[['year', 'month', 'day']])

时间匹配

In [54]:
merged_data = pd.merge(sta, fire, left_on='date', right_on='date', how='inner')
merged_data.to_csv('111.csv')

In [57]:
merged_data.head(100).to_csv('100.csv')

插值到火点（IDW）

In [4]:
#要插值的要素
merged_data = pd.read_csv('111.csv')
column_names_list = merged_data.columns.tolist()
print(column_names_list)
column_names = ['Alti', 'TEM_Max', 'TEM_Min', 'RHU_Min', 
                'PRE_Time_2020', 'Snow_Depth', 'WIN_S_Max', 'yth1', 'yth10', 'yth100', 
                'yth1000', 'kb', 'erc', 'sc', 'bi', 'ic', 'p', 'FFMC', 'DMC', 'DC', 'FWI', 'ISI', 'BUI', 'DSR', 
                'FFDI']
interpolated_results = merged_data.groupby('date').apply(lambda group: perform_idw(group, 'TEM_Max'))
# 重置最终结果的索引
interpolated_results.reset_index(drop=True, inplace=True)
# 显示插值结果的前几行
interpolated_results.head()

['Unnamed: 0', 'sta', 'Alti', 'year_x', 'month_x', 'day_x', 'TEM_Max', 'TEM_Min', 'RHU_Min', 'PRE_Time_2020', 'Snow_Depth', 'WIN_S_Max', 'lon_x', 'lat_x', 'yth1', 'yth10', 'yth100', 'yth1000', 'kb', 'erc', 'sc', 'bi', 'ic', 'p', 'FFMC', 'DMC', 'DC', 'FWI', 'ISI', 'BUI', 'DSR', 'FFDI', 'date', 'area', 'time', 'lon_y', 'lat_y', 'year_y', 'month_y', 'day_y', 'hour']


Unnamed: 0,date,year,month,day,area,lon,lat,TEM_Max
0,2010-03-10,2010,3,10,辽宁省,120.1782,40.9771,7.372037
1,2010-03-30,2010,3,30,辽宁省,122.3467,40.4534,15.246878
2,2010-03-30,2010,3,30,辽宁省,122.3741,40.4504,15.240758
3,2010-04-01,2010,4,1,辽宁省,121.39,41.4604,10.840293
4,2010-04-01,2010,4,1,辽宁省,121.3935,41.457,10.837965
