In [4]:
import glob
import os
import re
import chardet
import numpy as np
import pandas as pd
import streamlit as st
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf

In [23]:
# 過去の数ヶ月(step)分のデータとその直後のデータを作成
def sequences(data, step):
    X, Y = [], []
    for i in range(len(data) - step):
        X_seq, Y_seq = data[i:i+step], data[i+step]
        X.append(X_seq)
        Y.append(Y_seq)
    return np.array(X), np.array(Y)

In [8]:
files = glob.glob(f"Zaim*.csv")
for file_path in files:
    # エンコーディングを確認
    with open(file_path, "rb") as f:
        result = chardet.detect(f.read())
        encoding = result["encoding"]
    df_org = pd.read_csv(file_path, encoding=encoding)

df_org["日付"] = df_org["日付"].apply(lambda x: x[:7])
df_org[["year", "month"]] = df_org["日付"].str.split("-", expand=True)

df_org = df_org.rename(columns={
    "日付": "year-month",
    "カテゴリ": "category",
    "カテゴリの内訳": "sub_category",
    "支出": "expenses"
})
df_org = df_org[[
    "year-month",
    "category",
    "sub_category",
    "expenses"
]]

df_org = df_org[df_org["expenses"] > 0]

# 年月のリスト
date_list = df_org["year-month"].unique()

# 大カテゴリのDataFrameを作成→保持
df_all_cate = pd.DataFrame()
for date in date_list:
    df_filtered = df_org[df_org["year-month"] == date]
    df_total_cate = df_filtered.copy()
    
    df_total_cate["expen_cate"] = df_filtered.groupby("category")["expenses"].transform("sum")
    df_total_cate = df_total_cate.drop_duplicates(subset=["category"])
    
    df_total_cate = df_total_cate.sort_values(by=["category"])
    df_total_cate = df_total_cate.drop(["expenses", "sub_category"], axis=1)
    df_all_cate = pd.concat([df_all_cate, df_total_cate], ignore_index=True)

df_all_cate

Unnamed: 0,year-month,category,expen_cate
0,2022-04,その他,44444
1,2022-04,エンタメ,4256
2,2022-04,クルマ,6100
3,2022-04,交際費,2013
4,2022-04,住まい,110488
...,...,...,...
298,2024-11,住まい,96532
299,2024-11,日用雑貨,1254
300,2024-11,水道・光熱,7460
301,2024-11,美容・衣服,15815


In [47]:
last_month = df_all_cate["year-month"].iloc[-1]
year, month = int(last_month[:4]), int(last_month[5:7])

# 来月の年月を取得
if month == 12:
    month = 1
    year += 1
else:
    month += 1
next_month = f"{year}-{month:02d}"

print(next_month)


df_pred = pd.DataFrame(columns=["category", "prediction"])
cate_list = df_all_cate["category"].unique()

for target_cate in cate_list:
    data_cate = df_all_cate[df_all_cate["category"].str.contains(target_cate)].iloc[-12:].copy()
    expen_cate = data_cate["expen_cate"].values.reshape(-1, 1)

    # 正規化
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(expen_cate)
    if len(expen_cate) < 12:
        continue
    # else:
    print(target_cate)
    print(data_cate)

2024-12
その他
    year-month category  expen_cate
193    2023-12      その他      263400
204    2024-01      その他       80655
214    2024-02      その他       85168
223    2024-03      その他      106703
232    2024-04      その他       69574
241    2024-05      その他       95268
251    2024-06      その他       67512
260    2024-07      その他      122024
269    2024-08      その他      117150
277    2024-09      その他       72940
286    2024-10      その他      115720
294    2024-11      その他      116702
エンタメ
    year-month category  expen_cate
194    2023-12     エンタメ       10992
205    2024-01     エンタメ       33800
215    2024-02     エンタメ        1195
224    2024-03     エンタメ        9042
233    2024-04     エンタメ        2659
242    2024-05     エンタメ        9982
252    2024-06     エンタメ       17776
261    2024-07     エンタメ        7062
270    2024-08     エンタメ       10550
278    2024-09     エンタメ        2937
287    2024-10     エンタメ        6377
295    2024-11     エンタメ        7822
クルマ
    year-month category  expen_cate
35     

In [43]:
data_cate

Unnamed: 0,year-month,category,expen_cate
155,2023-07,通信,4900


In [48]:
# h5:モデルのアーキテクチャ、重み、トレーニング設定の全てを保存
MODEL_PATH = "model.h5"

# df_pred = pd.DataFrame(columns=["category", "prediction"])
cate_list = df_all_cate["category"].unique()

for target_cate in cate_list:
    data_cate = df_all_cate[df_all_cate["category"].str.contains(target_cate)].iloc[-12:].copy()
    expen_cate = data_cate["expen_cate"].values.reshape(-1, 1)
    
    if len(expen_cate) < 12:
        continue
        
    # 正規化
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(expen_cate)

    step = 3
    X, Y = sequences(scaled_data, step)

    # モデルが存在するか確認しない場合は作成する
    if os.path.exists(MODEL_PATH):
        model = tf.keras.models.load_model(MODEL_PATH)
    else:
        # モデルが存在しない場合、新しく作成
        model = tf.keras.Sequential()
        model.add(tf.keras.layers.LSTM(50, activation="relu", input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
        model.add(tf.keras.layers.LSTM(100, activation="relu", return_sequences=False))
        model.add(tf.keras.layers.Dense(1, activation="linear"))
        
        model.compile(optimizer="adam", loss="mean_squared_error")
    
    # 学習
    model.fit(X, Y, epochs=50, batch_size=32, verbose=0)

    # モデルの保存
    model.save(MODEL_PATH)

    X_test = scaled_data[-step:].reshape(1, step, 1)
    Y_pred = model.predict(X_test)
    Y_pred_ = scaler.inverse_transform(Y_pred)

    prediction = int(Y_pred_[0][0])
    
    df_pred = pd.DataFrame([{
        "year-month": next_month,
        "category": target_cate,
        "expen_cate": prediction
    }])

    df_all_cate = pd.concat([df_all_cate, df_pred], ignore_index=True)



2024-12-21 09:56:33.194611: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp_14.




2024-12-21 09:56:36.428521: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp_14.




2024-12-21 09:56:39.498842: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp_14.




2024-12-21 09:56:42.594238: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp_14.




2024-12-21 09:56:45.685521: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp_14.




2024-12-21 09:56:48.802317: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp_14.




2024-12-21 09:56:51.888037: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp_14.




2024-12-21 09:56:57.457329: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp_14.




2024-12-21 09:57:00.631762: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp_14.




2024-12-21 09:57:03.768482: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp_14.




2024-12-21 09:57:06.791047: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp_14.




In [53]:
df_all_cate.to_csv("df_all_pred.csv", index=False)