In [2]:
import pandas as pd
from scipy.spatial.distance import cdist
import warnings
from pandas.errors import DtypeWarning
from scipy.spatial import cKDTree
import numpy as np
warnings.simplefilter(action='ignore', category=DtypeWarning)

In [3]:
def idw_interpolation(known_coords, known_values, unknown_coords, power=2):
    # 使用已知坐标创建cKDTree对象，以便高效查询最近的邻居
    tree = cKDTree(known_coords)
    
    # 对每个未知点查询其k个最近的邻居，我们这里使用3个邻居
    # distances存储到k个最近邻居的距离，indices存储k个最近邻居的索引
    distances, indices = tree.query(unknown_coords, k=10)
    
    # 避免除以零的情况，如果距离为0，将其替换为一个非常小的数
    distances[distances == 0] = 1e-10
    
    # 根据距离的倒数计算权重，权重与距离的倒数成比例
    # power参数用于调整权重的影响，power越大，近处的点权重越大
    weights = 1 / distances ** power
    
    # 对权重进行归一化，确保每个未知点的权重总和为1
    weights /= weights.sum(axis=1, keepdims=True)
    
    # 使用权重和已知值计算插值值，采用加权平均的方式
    # np.einsum用于执行权重与已知值的加权求和
    interpolated_values = np.einsum('ij,ij->i', weights, known_values[indices])
    
    # 返回插值值
    return interpolated_values


In [4]:
def perform_idw(group, feature_name):
    known_coords = group[['lon_x', 'lat_x']].values  # 已知坐标（站点）
    known_values = group[feature_name].values        # 已知值（例如最高温度），使用传入的特性名称
    unknown_coords = group[['lon_y', 'lat_y']].drop_duplicates().values # 未知坐标（火点）
    interpolated_values = idw_interpolation(known_coords, known_values, unknown_coords) # 执行IDW插值
    return pd.DataFrame({  # 返回插值结果
        'date': group['date'].iloc[0],
        'year': group['year_y'].iloc[0],
        'month': group['month_y'].iloc[0],
        'day': group['day_y'].iloc[0],
        'area': group['area'].iloc[0],
        'lon': unknown_coords[:, 0],
        'lat': unknown_coords[:, 1],
         feature_name: interpolated_values  # 使用特性名称作为列名
    })


火点数据集处理
---

In [5]:
fire = pd.read_excel(r'/Users/yanyuchen/数据存放/森林火险/fire2010-2017.xls')

In [6]:
provinces = ['辽宁省', '黑龙江省', '吉林省','云南省','四川省','贵州省']
fire = fire[fire['地区'].isin(provinces)]
name = {'地区':'area',
        '图像日期':'time',
        '东经':'lon',
        '北纬':'lat'
}
#
fire.rename(columns=name, inplace=True)
fire['time'] = pd.to_datetime(fire['time'])
fire['year'] = fire['time'].dt.year
fire['month'] = fire['time'].dt.month
fire['day'] = fire['time'].dt.day
fire['hour'] = fire['time'].dt.hour
fire['date'] = fire['time'].dt.date
fire['date'] = pd.to_datetime(fire['date'])

In [7]:
fire.head()

Unnamed: 0,area,time,lon,lat,year,month,day,hour,date
4,贵州省,2010-01-03 14:13:00,107.99,28.37,2010,1,3,14,2010-01-03
5,贵州省,2010-01-03 14:13:00,106.37,26.29,2010,1,3,14,2010-01-03
6,贵州省,2010-01-03 14:13:00,106.41,27.32,2010,1,3,14,2010-01-03
7,贵州省,2010-01-03 14:13:00,105.37,26.66,2010,1,3,14,2010-01-03
10,贵州省,2010-01-03 16:37:00,107.97,28.33,2010,1,3,16,2010-01-03


站点数据读取
---

In [8]:
sta = pd.read_csv('/Users/yanyuchen/数据存放/森林火险/dataset.csv')
sta['date'] = pd.to_datetime(sta[['year', 'month', 'day']])

时间匹配

In [9]:
merged_data = pd.merge(sta, fire, left_on='date', right_on='date', how='inner')
#merged_data.to_csv('111.csv')

In [10]:
#merged_data.head(100).to_csv('100.csv')

插值到火点（IDW）

In [11]:
#要插值的要素
# merged_data = pd.read_csv('111.csv')
column_names_list = merged_data.columns.tolist()
print(column_names_list)
column_names = ['Alti', 'TEM_Max', 'TEM_Min', 'RHU_Min', 
                'PRE_Time_2020', 'Snow_Depth', 'WIN_S_Max', 'yth1', 'yth10', 'yth100', 
                'yth1000', 'kb', 'erc', 'sc', 'bi', 'ic', 'p', 'FFMC', 'DMC', 'DC', 'FWI', 'ISI', 'BUI', 'DSR', 
                'FFDI']
interpolated_results = merged_data.groupby('date').apply(lambda group: perform_idw(group, 'TEM_Max'))
# 重置最终结果的索引
interpolated_results.reset_index(drop=True, inplace=True)
# 显示插值结果的前几行
interpolated_results.head()

['sta', 'Alti', 'year_x', 'month_x', 'day_x', 'TEM_Max', 'TEM_Min', 'RHU_Min', 'PRE_Time_2020', 'Snow_Depth', 'WIN_S_Max', 'lon_x', 'lat_x', 'yth1', 'yth10', 'yth100', 'yth1000', 'kb', 'erc', 'sc', 'bi', 'ic', 'p', 'FFMC', 'DMC', 'DC', 'FWI', 'ISI', 'BUI', 'DSR', 'FFDI', 'date', 'area', 'time', 'lon_y', 'lat_y', 'year_y', 'month_y', 'day_y', 'hour']


Unnamed: 0,date,year,month,day,area,lon,lat,TEM_Max
0,2010-01-03,2010,1,3,贵州省,107.99,28.37,16.0
1,2010-01-03,2010,1,3,贵州省,106.37,26.29,15.6
2,2010-01-03,2010,1,3,贵州省,106.41,27.32,16.3
3,2010-01-03,2010,1,3,贵州省,105.37,26.66,21.5
4,2010-01-03,2010,1,3,贵州省,107.97,28.33,16.0
