更改内容：
1. 对数据进行处理（0-1、或者**正态分布**）
2. 使插值操作只对目标点的周围a格进行




导入包和函数

In [None]:

import geopandas as gpd
from math import radians, sin, cos, asin, sqrt
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from shapely.geometry import Polygon, Point
import sklearn.metrics as metrics
import random

In [None]:
# 数据处理形成正态分布或0-1的样子，我们的降雨数据没有负数
def standardize(data):
    mean = np.mean(data)
    std = np.std(data)
    transformed_data = np.empty(data.shape)
    for i in range(len(data)):
        transformed_data[i] = (data[i] - mean) / std

    return transformed_data


def normalize(data):
    max = np.max(data)
    min = np.min(data)
    transformed_data = np.empty(data.shape)
    for i in range(len(data)):
        transformed_data[i] = (data[i] - min) / (max - min)

    return transformed_data


# 还没有被geopy替代，geopy来计算距离总是会卡住
def caldis(lon1, lat1, lon2, lat2):  # 输入两点经纬度
    a = radians(lat1 - lat2)
    b = radians(lon1 - lon2)
    lat1, lat2 = radians(lat1), radians(lat2)
    t = sin(a / 2) ** 2 + cos(lat1) * cos(lat2) * sin(b / 2) ** 2
    d = 2 * asin(sqrt(t)) * 6378.137

    # 设置一个阈值，当距离接近于零时，使用一个非零的默认距离值,防止溢出
    threshold = 1e-6
    if d < threshold:
        d = threshold

    return d  # 返回两点距离


def idw(lon, lat, pm, x, y):
    lstz = []
    for i in range(len(x)):
        lstd = []
        for j in range(len(lon)):
            d = caldis(lon[j], lat[j], x[i], y[i])
            lstd.append(d)
        sqdis = list(1 / np.power(lstd, 2))
        sumdis = np.sum(sqdis)

        z = np.sum(np.array(pm) * np.array(sqdis)) / sumdis
        lstz.append(z)
    # print(lstz)
    return lstz  # 返回插值


# 在缓冲区内随机生成点
def random_point_within_polygon(polygon):
    min_x, min_y, max_x, max_y = polygon
    # 创建了一个具有四个角点的多边形
    polygon_obj = Polygon([(min_x, min_y), (max_x, min_y), (max_x, max_y), (min_x, max_y)])  # Create a Polygon object
    while True:
        random_point = Point(random.uniform(min_x, max_x), random.uniform(min_y, max_y))
        if polygon_obj.contains(random_point):
            return random_point


修改：
生成每个点的buffer，在其中随机生成点

In [None]:

# 读取数据
# 数据处理形成正态分布或0-1的样子，我们的降雨数据没有负数
# 但是对纬度最好不要进行normalize，因为纬度是有范围的，归一化会导致数据在极点出现
# /content/drive/MyDrive/geo-master/random_data.csv
workbook = pd.read_csv("/content/drive/MyDrive/geo-master/random_data.csv", nrows=100)
lat, lon, rainfall = workbook.lat, workbook.lon, workbook['2016']
# rainfall = standardize(rainfall)


# 准备存储随机点的经纬度，已知点信息，随机点信息
points = gpd.GeoDataFrame()
random_points = gpd.GeoDataFrame()
prediction_result = []

# 对于csv表格中的每一行，读取这个点（point），生成一个buffer，然后在其中生成一个随机点
for i in range(len(lat)):
    # 存储点到geodataframe中，之后会用到
    points = pd.concat([points, gpd.GeoDataFrame(geometry=[Point(lon[i], lat[i])])], ignore_index=True)
    # 创建缓冲区并在其中生成随机点
    circle_buffer = Point(lon[i], lat[i]).buffer(0.5)
    random_point = random_point_within_polygon(circle_buffer.bounds)

    # 存储随机点，不必单独获取随机点的经纬度，已经存储在geodataframe中了
    random_points = pd.concat([random_points, gpd.GeoDataFrame(geometry=[random_point])], ignore_index=True)


# 用idw插值
prediction_result = idw(lon.tolist(), lat.tolist(), rainfall, random_points.geometry.x, random_points.geometry.y)
print(prediction_result)

[860.707402872474, 508.45309191566025, 926.7762966429731, 248.6938500798622, 502.75710200373214, 294.7710833701856, 92.06020012779908, 117.25932863645757, 296.60379092484413, 956.9320885048439, 579.790207787103, 603.2716675064406, 2521.027245405381, 980.9851145945207, 1358.9778103422386, 1434.4452981073662, 341.0770697153727, 346.68904075807586, 639.2538110196782, 159.23447379308058, 79.84001369088055, 1065.7592184715093, 358.0390674450011, 729.0773769512022, 1089.636952497897, 2116.441299660827, 2220.2165895704984, 193.33854756219554, 174.5489120772206, 481.95252092569046, 548.0265814598145, 286.32686810476554, 240.24481838125516, 1704.3968074445324, 97.71605218818216, 119.02104294762078, 1103.9610644023926, 1470.5928176147904, 1557.5155833858903, 1346.1890737491315, 46.52056960418647, 948.946869762387, 2171.3487323764252, 678.1700634050073, 682.1769931743323, 657.9764146485244, 178.62121055932195, 77.36020284986476, 1529.7526629175443, 67.36753743541617, 81.04791840957833, 1974.46156

In [None]:
# 计算精度
mae = metrics.mean_absolute_error(rainfall, prediction_result)  # 0 表示完美预测，值越大表示预测误差越大。
mse = metrics.mean_squared_error(rainfall, prediction_result)  # 0 表示完美预测，值越大表示预测误差越大。
rmse = np.sqrt(mse)
r2 = metrics.r2_score(rainfall, prediction_result)  # 1 表示完美预测，0 表示模型与简单平均值的效果相同，负值表示模型预测比直接使用平均值还要差。

print("The accuracy of mae, mse, rmse, r2:", (mae, mse, rmse, r2))


The accuracy of mae, mse, rmse, r2: (27.338484269687324, 1935.6797233264533, 43.9963603418107, 0.994790499455605)
