In [7]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

# 讀取數據
df = pd.read_csv('/home/yuchi/Bike_Rental_Prediction/SeoulBikeData.csv')

# 數據預處理
# 將連續型變量離散化
def discretize_column(df, column, bins):
    labels = [f'{column}_{i}' for i in range(len(bins)-1)]
    df[f'{column}_cat'] = pd.cut(df[column], bins=bins, labels=labels)
    return df

# 對主要特徵進行離散化
df = discretize_column(df, 'Temperature(C)', bins=[-20, 0, 10, 20, 30, 40])
df = discretize_column(df, 'Humidity(%)', bins=[0, 20, 40, 60, 80, 100])
df = discretize_column(df, 'Wind speed (m/s)', bins=[0, 2, 4, 6, 8, 10])
df = discretize_column(df, 'Rented Bike Count', bins=[0, 500, 1000, 1500, 2000, 3000])

# 創建one-hot編碼
columns_to_encode = ['Temperature(C)_cat', 'Humidity(%)_cat', 'Wind speed (m/s)_cat', 'Rented Bike Count_cat']
df_encoded = pd.get_dummies(df[columns_to_encode])

# 使用Apriori算法找出頻繁項集
frequent_itemsets = apriori(df_encoded, min_support=0.1, use_colnames=True)

# 生成關聯規則
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5, num_itemsets=len(frequent_itemsets))

# 定義函數用於預測租借數量
def predict_bike_count(temp, humidity, wind_speed):
    # 將輸入值轉換為對應的類別
    temp_cat = pd.cut([temp], bins=[-20, 0, 10, 20, 30, 40], labels=['Temperature(C)_0', 'Temperature(C)_1', 'Temperature(C)_2', 'Temperature(C)_3', 'Temperature(C)_4'])[0]
    humidity_cat = pd.cut([humidity], bins=[0, 20, 40, 60, 80, 100], labels=['Humidity(%)_0', 'Humidity(%)_1', 'Humidity(%)_2', 'Humidity(%)_3', 'Humidity(%)_4'])[0]
    wind_cat = pd.cut([wind_speed], bins=[0, 2, 4, 6, 8, 10], labels=['Wind speed (m/s)_0', 'Wind speed (m/s)_1', 'Wind speed (m/s)_2', 'Wind speed (m/s)_3', 'Wind speed (m/s)_4'])[0]
    print(f"溫度類別: {temp_cat}" + f"\n濕度類別: {humidity_cat}" + f"\n風速類別: {wind_cat}")

    # 查找相關規則
    relevant_rules = rules[rules['antecedents'].apply(lambda x: temp_cat in x or humidity_cat in x or wind_cat in x)]
    print(relevant_rules)
    relevant_rules = relevant_rules[relevant_rules['consequents'].apply(lambda x: 'Rented Bike Count' in str(x))]
    print(relevant_rules)
    ''''''
    if len(relevant_rules) > 0:
        # 根據置信度最高的規則進行預測
        best_rule = relevant_rules.sort_values('confidence', ascending=False).iloc[0]
        predicted_range = list(best_rule['consequents'])[0]
        return f"預測的租借數量範圍: {predicted_range}"
    else:
        return "無法找到足夠的關聯規則進行預測"
    ''''''

# 測試預測函數
print(predict_bike_count(15, 50, 3))

# 顯示一些重要的關聯規則
print("\n重要關聯規則:")
important_rules = rules[rules['lift'] > 1.5].sort_values('confidence', ascending=False)
print(important_rules[['antecedents', 'consequents', 'confidence', 'lift']].head())


溫度類別: Temperature(C)_2
濕度類別: Humidity(%)_2
風速類別: Wind speed (m/s)_1
Empty DataFrame
Columns: [antecedents, consequents, antecedent support, consequent support, support, confidence, lift, representativity, leverage, conviction, zhangs_metric, jaccard, certainty, kulczynski]
Index: []
Empty DataFrame
Columns: []
Index: []
無法找到足夠的關聯規則進行預測

重要關聯規則:
                                          antecedents  \
0               (Temperature(C)_cat_Temperature(C)_0)   
13  (Wind speed (m/s)_cat_Wind speed (m/s)_0, Humi...   
8                     (Humidity(%)_cat_Humidity(%)_4)   
15                    (Humidity(%)_cat_Humidity(%)_4)   

                                          consequents  confidence      lift  
0         (Rented Bike Count_cat_Rented Bike Count_0)    0.954608  2.055645  
13        (Rented Bike Count_cat_Rented Bike Count_0)    0.747107  1.608815  
8         (Rented Bike Count_cat_Rented Bike Count_0)    0.740106  1.593738  
15  (Wind speed (m/s)_cat_Wind speed (m/s)_0, Rent...  