In [None]:
import pandas as pd
import numpy as np
import torch
import imblearn
from imblearn.over_sampling import RandomOverSampler
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from keras.models import load_model
import warnings
warnings.filterwarnings('ignore')

In [None]:
# 讀取資料
data = pd.read_csv('./data/feature_week.csv')

# 創建 sub_days_group 欄位
data['sub_days_group'] = np.where(data['sub_days'] >= 90, 1, 0)

# 複製資料
data_copy = data.copy()

# 對 'connect', 'platform', 'plan_type' 進行 One-Hot 編碼
df_onehot = pd.get_dummies(data_copy, columns=['connect', 'platform', 'plan_type'])

# 將布林值轉換為整數
df_onehot = df_onehot * 1  

#刪除df_onehot中的'id', 'sub_start', 'sub_end', 'sub_start_new', 'unique_id', 'game', 'unsub_after_holiday', 'unsub_during_holiday', 'cover_vacation', 'week0', 'week1', 'week2', 'week3', 'week4', 'week5', 'week6', 'week7', 'week8', 'week9', 'week10', 'week11', 'week12','gender', 'age', 'twm_number'欄位
df_onehot = df_onehot.drop(columns=['id', 'sub_start', 'sub_end', 'sub_start_new', 'unique_id', 'game', 'unsub_after_holiday', 'unsub_during_holiday', 'cover_vacation', 'week0', 'week1', 'week2', 'week3', 'week4', 'week5', 'week6', 'week7', 'week8', 'week9', 'week10', 'week11', 'week12'])

df_onehot.fillna(0, inplace=True)

# 將"none"字串轉換為 0
df_onehot.replace(to_replace='none', value=0, inplace=True)

#將object轉換為數值
object_columns = df_onehot.select_dtypes(include=['object']).columns
for column in object_columns:
    df_onehot[column] = pd.to_numeric(df_onehot[column], errors='coerce')

In [None]:
# 定義需要創建的欄位類型和相對應的原始欄位名稱
columns_to_create = {
    'sub_before_vacation': 'week{}_sub_before_vacation',
    'sub_during_vacation': 'week{}_sub_during_vacation',
    'plan_type_年訂': 'week{}_plan_type_年訂',
    'plan_type_季訂': 'week{}_plan_type_季訂',
    'plan_type_月訂': 'week{}_plan_type_月訂',
    'sub_days': 'week{}_sub_days',
    'sub_days_group': 'week{}_sub_days_group',
    'casual': 'week{}_casual',
    'free': 'week{}_free',
    'adventure': 'week{}_adventure',
    'action': 'week{}_action',
    'multiplayer_tactical_competition': 'week{}_multiplayer_tactical_competition',
    'massive_multiplayer_online': 'week{}_massive_multiplayer_online',
    'platformer': 'week{}_platformer',
    'fighting': 'week{}_fighting',
    'simulation': 'week{}_simulation',
    'indie': 'week{}_indie',
    'racing': 'week{}_racing',
    'first_person_shooter': 'week{}_first_person_shooter',
    'strategy': 'week{}_strategy',
    'arcade': 'week{}_arcade',
    'role_playing': 'week{}_role_playing',
    'puzzle': 'week{}_puzzle',
    'sports': 'week{}_sports',
    'family': 'week{}_family',
    'connect_ethernet':'wee', 
    'connect_mobile':'week{}_connect_mobile',
    'connect_none':'week{}_connect_none',
    'connect_wifi':'week{}_connect_wifi',
    'platform_android':'week{}_platform_android',
    'platform_ios':'week{}_platform_ios',
    'platform_ipados':'week{}_platform_ipados',
    'platform_linux':'week{}_platform_linux',
    'platform_macos':'week{}_platform_macos',
    'platform_none':'week{}_platform_none',
    'platform_other':'week{}_platform_other',
    'platform_web':'week{}_platform_web',
    'platform_windows':'week{}_platform_windows'
}

# 迴圈執行12週的欄位創建
for week in range(1, 13):
    for key, value in columns_to_create.items():
        df_onehot[value.format(week)] = df_onehot[key]

In [None]:
# 創建一個空列表來存放所有需要歸一化的欄位名稱
columns_to_scale = []

# 逐週添加相關統計數據欄位到列表中
for i in range(1, 13):  # 從第1週到第12週
    columns_to_scale.extend([
        f'week{i}_avg_play_minute',
        f'week{i}_avg_row_count_by_day',
        f'week{i}_sub_days'
    ])


# 進行歸一化處理
scaler = MinMaxScaler()
df_onehot[columns_to_scale] = scaler.fit_transform(df_onehot[columns_to_scale])


In [None]:
#X
X = df_onehot

In [None]:
def generate_columns(variables, start_week, end_week):
    columns = []
    for week in range(start_week, end_week + 1):
        prefix = f'week{week}_'
        columns.extend([prefix + var for var in variables])
    return columns

# 全部variables
variables = ['plan_type_年訂','plan_type_季訂','plan_type_月訂',
             'sub_days','sub_days_group','sub_before_vacation', 'sub_during_vacation',
             'free', 'adventure', 'massive_multiplayer_online', 'first_person_shooter', 
             'avg_play_minute', 'avg_row_count_by_day']

# 生成從第1週到第12週的指定變數的列名
selected_columns = generate_columns(variables, 1, 12)
selected_columns

In [None]:
#12week, 13features
# 用來複製並只保留特定欄位的 DataFrame
df_selected_x_test = X[selected_columns].copy()

X_test = np.array(df_selected_x_test).reshape(df_onehot.shape[0], 12, 13)

# 轉換成 PyTorch 張量
X_tensor_test = torch.tensor(X_test, dtype=torch.float32)

In [None]:
# 載入模型
model = load_model('./data/model/lstm_model_G3.h5')

# 預測
y_pred_prob = model.predict(X_tensor_test)
y_pred = (y_pred_prob > 0.5).astype(int)

# 預測結果
print(y_pred)

#y_pred的shape
print(y_pred.shape)

#data的shape
print(data.shape)

# 確保y_pred的形狀與DataFrame的行數一致
if len(y_pred) == len(data):
    data['y_pred'] = y_pred
else:
    print(f"Error: Length of predictions ({len(y_pred)}) does not match length of data ({len(data)})")

# 輸出 DataFrame
print(data)