# 6.根據顧客年齡的差異(分成老、中、青)，比較其使用公司服務的關聯規則的異同。

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import missingno as msno

from sklearn.cluster import KMeans ,DBSCAN
from sklearn import cluster,metrics
from mlxtend.frequent_patterns import apriori ,association_rules

sns.set(style="whitegrid")
pd.set_option('display.max_columns', None)
pd.set_option('display.unicode.ambiguous_as_wide', True)
pd.set_option('display.unicode.east_asian_width', True)
plt.rcParams['axes.unicode_minus'] = False # 正常顯示負號

In [None]:
# Colab 進行matplotlib繪圖時顯示繁體中文
# 下載台北思源黑體並命名taipei_sans_tc_beta.ttf，移至指定路徑
!wget -O TaipeiSansTCBeta-Regular.ttf https://drive.google.com/uc?id=1eGAsTN1HBpJAkeVM57_C7ccp7hbgSz3_&export=download

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.font_manager import fontManager

# 改style要在改font之前
# plt.style.use('seaborn')

fontManager.addfont('TaipeiSansTCBeta-Regular.ttf')
mpl.rc('font', family='Taipei Sans TC Beta')

# EDA

In [None]:
df = pd.read_csv('/content/drive/MyDrive/大三/上學期/大數據決策/期末報告/customer_data_handled.csv')

df

# Processing Data

根據主題(問題)說明先將年齡分成老、中、青，3群

In [None]:
features = ['年齡']
x = df[features]

k = 3
kmeans = KMeans(n_clusters=k, init='k-means++', max_iter=300, n_init=10, random_state=42)
clusters = kmeans.fit_predict(x)

# 將分群結果加入到原始資料中
df['Age_Cluster'] = clusters

# KMeans_Cluster每一群的最大和最小年齡
age_range = df.groupby('Age_Cluster')['年齡'].agg(['min', 'max'])
print(age_range)

青年族群 19-37

中年族群 38-57

老年族群 58-80

In [None]:
young = df[df['年齡'] < 37]
middle = df[(df['年齡'] >= 38) & (df['年齡'] <= 57)]
old = df[df['年齡'] >= 58]

In [None]:
# 公司服務
features = ['網路服務', '線上安全服務', '線上備份服務', '設備保護計劃', '技術支援計劃', '電視節目', '電影節目', '音樂節目', '無限資料下載']

# Association rule

In [None]:
# One-hot Encoding
onehot_young = pd.get_dummies(young[features])
onehot_middle = pd.get_dummies(middle[features])
onehot_old = pd.get_dummies(old[features])

In [None]:
# Apriori Algorithm
frequent_itemsets_young = apriori(onehot_young, min_support=0.01, use_colnames=True)
frequent_itemsets_middle = apriori(onehot_middle, min_support=0.01, use_colnames=True)
frequent_itemsets_old = apriori(onehot_old, min_support=0.01, use_colnames=True)

rules_young = association_rules(frequent_itemsets_young, metric="lift", min_threshold=1)
rules_middle = association_rules(frequent_itemsets_middle, metric="lift", min_threshold=1)
rules_old = association_rules(frequent_itemsets_old, metric="lift", min_threshold=1)

In [None]:
print("年輕群組的關聯規則:")
print(rules_young.head())
print("="*70)

In [None]:
print("\n中年群組的關聯規則:")
print(rules_middle.head())
print("="*70)

In [None]:
print("\n老年群組的關聯規則:")
print(rules_old.head())

In [None]:
rules_young.to_csv('young_customers.csv', index=False, encoding='utf_8_sig')
rules_middle.to_csv('middle_aged_customers.csv', index=False, encoding='utf_8_sig')
rules_old.to_csv('old_customers.csv', index=False, encoding='utf_8_sig')