In [25]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import h3
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [26]:
h3_l7_df = pd.read_csv('h3_l7_df_new.csv')

train_index=[]
test_index=[]
for i in range(0,h3_l7_df.shape[0]):
    geo_location = h3.h3_to_geo(h3_l7_df.iloc[i]['id'])

    if (geo_location[1]) > (-76.05): #把經度大於-76.05的 當train (東邊是train)
        train_index.append(i)
    else:
        test_index.append(i)

# 分割訓練集和測試集
train_h3_l7_df = h3_l7_df.iloc[train_index]
test_h3_l7_df = h3_l7_df.iloc[test_index]

# 將 h3_l7_df 資料框中的 'id' 列移除，僅保留數據進行正規化
h3_spatial_data = h3_l7_df.drop('id', axis=1)


# 將 DataFrame 轉換為 numpy array，並設定數據類型為 np.float64
spatial_data = np.array(h3_spatial_data).astype(np.float64)


train_spatial_data = spatial_data[train_index]
test_spatial_data = spatial_data[test_index]

print(len(train_index))
print(len(test_index))


83
94


In [27]:
# 假設目標變量是 'ohca'，並且它是最後一列
X_train = train_spatial_data[:, :-1]  # 特徵
y_train = train_spatial_data[:, -1]   # 目標變量

X_test = test_spatial_data[:, :-1]    # 測試集特徵
y_test = test_spatial_data[:, -1]     # 測試集目標變量

X_train = sm.add_constant(X_train)
X_test = sm.add_constant(X_test)

In [28]:
poisson_model = sm.GLM(y_train, X_train, family=sm.families.Poisson())
poisson_results = poisson_model.fit()

In [29]:
# 查看模型摘要
print(poisson_results.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                      y   No. Observations:                   83
Model:                            GLM   Df Residuals:                       27
Model Family:                 Poisson   Df Model:                           55
Link Function:                    Log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -92.153
Date:                Thu, 06 Mar 2025   Deviance:                       22.361
Time:                        16:49:57   Pearson chi2:                     40.6
No. Iterations:                    23   Pseudo R-squ. (CS):              1.000
Covariance Type:            nonrobust                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -2.0908      0.611     -3.421      0.0

In [39]:
y_test

array([ 1.,  1.,  2.,  1.,  0.,  0.,  0.,  0.,  0.,  1.,  2.,  0.,  0.,
        0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0., 15.,  7., 16.,
        3.,  9.,  0., 32., 20., 37.,  8., 41., 29., 45.,  0.,  0.,  0.,
        0.,  9.,  1., 20.,  7., 17., 32., 33.,  0.,  0.,  0., 19.,  3.,
        2., 40.,  2., 10., 22., 45., 47., 45.,  0., 29.,  0., 20.,  1.,
       20., 31., 27., 55.,  6.,  9.,  1., 37.,  3., 11., 53., 32., 56.,
       52.,  9.,  3., 13., 47., 53., 31., 30., 78., 49., 54., 25.,  2.,
       20.,  2., 20.])

In [40]:
y_pred

array([1.29528837e-01, 1.23582821e-01, 1.23582821e-01, 1.52415412e-01,
       1.14068080e-01, 1.30023120e-01, 2.55755048e-01, 5.38992275e-02,
       1.29528837e-01, 3.20192364e-01, 4.79307785e-01, 2.89882314e-01,
       5.48260728e-01, 5.24855134e-01, 1.53910664e-01, 4.30059577e-05,
       6.84326697e-01, 3.00762002e-01, 9.63617833e-02, 1.39568065e-01,
       1.21014850e-01, 1.37891680e-01, 1.27290318e-01, 9.62168891e-37,
       6.60954942e+00, 3.37632572e-01, 1.24453163e-01, 3.54961691e-49,
       8.48421971e+12, 4.47082274e+15, 2.63198834e+12, 1.99991349e-11,
       1.93703290e+01, 1.36838857e+12, 3.90802350e-01, 7.23569986e-02,
       1.38404399e-01, 1.22342092e-01, 1.01658612e-01, 1.40844964e-01,
       9.25656597e-11, 6.56108704e-02, 1.27672302e-05, 3.13314670e-04,
       3.69411213e-09, 5.20979883e-08, 2.37516830e-04, 1.51055666e-01,
       1.41592732e-01, 1.09228587e-01, 3.94174227e-12, 2.63190232e-06,
       1.25022061e-01, 2.18946693e+07, 1.96171394e-01, 3.32642560e+00,
      

In [38]:
# 使用測試集進行預測
y_pred = poisson_results.predict(X_test)

# 評估模型性能
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"Mean Absolute Error: {mae}")

Mean Squared Error: 2.2946294324956166e+78
Mean Absolute Error: 1.5688409994794067e+38


In [None]:
h3_l7_df = pd.read_csv('h3_l7_df_new.csv')

train_index=[]
test_index=[]
for i in range(0,h3_l7_df.shape[0]):
    geo_location = h3.h3_to_geo(h3_l7_df.iloc[i]['id'])

    if (geo_location[1]) > (-76.05): #把經度大於-76.05的 當train (東邊是train)
        train_index.append(i)
    else:
        test_index.append(i)

# 分割訓練集和測試集
train_h3_l7_df = h3_l7_df.iloc[train_index]
test_h3_l7_df = h3_l7_df.iloc[test_index]

# 將 h3_l7_df 資料框中的 'id' 列移除，僅保留數據進行正規化
h3_spatial_data = h3_l7_df.drop('id', axis=1)


# 將 DataFrame 轉換為 numpy array，並設定數據類型為 np.float64
spatial_data = np.array(h3_spatial_data).astype(np.float64)


train_spatial_data = spatial_data[train_index]
test_spatial_data = spatial_data[test_index]

print(len(train_index))
print(len(test_index))

# 假設目標變量是 'ohca'，並且它是最後一列
X_train = train_spatial_data[:, :-1]  # 特徵
y_train = train_spatial_data[:, -1]   # 目標變量

X_test = test_spatial_data[:, :-1]    # 測試集特徵
y_test = test_spatial_data[:, -1]     # 測試集目標變量

X_train = sm.add_constant(X_train)
X_test = sm.add_constant(X_test)

poisson_model = sm.GLM(y_train, X_train, family=sm.families.Poisson())
poisson_results = poisson_model.fit()