In [8]:
import numpy as np
import gym
from gym import spaces

class FinancialPlanningEnv(gym.Env):
    def __init__(self, goal=10000, bag_price=2000):
        super(FinancialPlanningEnv, self).__init__()
        self.state = None
        self.savings = 0
        self.total_income = 0
        self.total_expenses = 0
        self.goal = goal
        self.bag_price = bag_price
        self.max_daily_income = 159
        self.min_daily_income = 136
        self.max_daily_expenses = 35
        self.min_daily_expenses = 12
        self.action_space = spaces.Discrete(3)  # 0: save, 1: spend, 2: buy bag
        self.observation_space = spaces.Box(low=0, high=np.inf, shape=(4,), dtype=np.float32)

    def reset(self):
        self.savings = 0
        self.total_income = 0
        self.total_expenses = 0
        self.state = [self.savings, self.total_income, self.total_expenses, 0]
        return np.array(self.state, dtype=np.float32)

    def step(self, action):
        daily_income = np.random.uniform(self.min_daily_income, self.max_daily_income)
        daily_expenses = np.random.uniform(self.min_daily_expenses, self.max_daily_expenses)
        
        reward = 0

        if action == 0:  # save
            savings_amount = daily_income - daily_expenses
            self.savings += savings_amount
            self.total_income += daily_income
            self.total_expenses += daily_expenses  # Update total expenses
            reward = savings_amount
        elif action == 1:  # spend
            self.total_expenses += daily_expenses
            self.total_income += daily_income
            reward = -daily_expenses
        elif action == 2:  # buy bag
            if self.savings >= self.bag_price:
                self.savings -= self.bag_price
                reward = -self.bag_price
            else:
                reward = -2000
            # Regardless of the action, update expenses for the day
            self.total_income += daily_income
            self.total_expenses += daily_expenses

        self.state = [self.savings, self.total_income, self.total_expenses, self.savings / self.goal]
        done = self.savings >= self.goal

        return np.array(self.state, dtype=np.float32), reward, done, {}

    def render(self, mode='human'):
        output = f"Savings: {self.savings}, Income: {self.total_income}, Expenses: {self.total_expenses}, Goal Progress: {self.savings / self.goal:.2%}"
        if mode == 'human':
            print(output)
        elif mode == 'ansi':
            return output
        else:
            super().render(mode=mode)


## Sample Model Training and Evaluation

In [25]:
import numpy as np
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import DQN
from stable_baselines3.common.env_checker import check_env
from gymnasium.utils import seeding
import time

class FinancialPlanningEnv(gym.Env):
    def __init__(self, goal=1000, bag_price=2000):
        super(FinancialPlanningEnv, self).__init__()
        self.state = None
        self.savings = 0
        self.total_income = 0
        self.total_expenses = 0
        self.goal = goal
        self.bag_price = bag_price
        self.max_daily_income = 159
        self.min_daily_income = 136
        self.max_daily_expenses = 35
        self.min_daily_expenses = 12
        self.action_space = spaces.Discrete(3)  # 0: save, 1: spend, 2: buy bag
        self.observation_space = spaces.Box(low=0, high=np.array([np.inf, np.inf, np.inf, 1]), dtype=np.float32)
        self.max_steps = 365  # Maximum steps per episode, e.g., 1 year
        self.current_step = 0

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.savings = 0
        self.total_income = 0
        self.total_expenses = 0
        self.state = [self.savings, self.total_income, self.total_expenses, 0]
        self.current_step = 0
        if seed is not None:
            self.np_random, _ = seeding.np_random(seed)
        return np.array(self.state, dtype=np.float32), {}

    def step(self, action):
        daily_income = np.random.uniform(self.min_daily_income, self.max_daily_income)
        daily_expenses = np.random.uniform(self.min_daily_expenses, self.max_daily_expenses)
        
        reward = 0

        if action == 0:  # save
            savings_amount = daily_income - daily_expenses
            self.savings += savings_amount
            self.total_income += daily_income
            self.total_expenses += daily_expenses  # Update total expenses
            reward = savings_amount * 0.1  # Smaller incremental reward for saving
        elif action == 1:  # spend
            self.total_expenses += daily_expenses
            self.total_income += daily_income
            reward = -daily_expenses * 0.1  # Smaller penalty for spending
        elif action == 2:  # buy bag
            if self.savings >= self.bag_price:
                self.savings -= self.bag_price
                reward = 1000  # Reward for buying the bag
            else:
                reward = -2000  # Penalty for attempting to buy without enough savings
            self.total_income += daily_income
            self.total_expenses += daily_expenses

        # Continuous reward for maintaining savings
        if self.savings >= 0.5 * self.goal:  # Reward for maintaining at least 50% of the goal
            reward += 10
        
        self.state = [self.savings, self.total_income, self.total_expenses, self.savings / self.goal]
        done = self.savings >= self.goal
        self.current_step += 1
        truncated = self.current_step >= self.max_steps

        return np.array(self.state, dtype=np.float32), reward, done, truncated, {}

    def render(self, mode='human'):
        output = f"Savings: {self.savings}, Income: {self.total_income}, Expenses: {self.total_expenses}, Goal Progress: {self.savings / self.goal:.2%}"
        if mode == 'human':
            print(output)
        elif mode == 'ansi':
            return output
        else:
            super().render(mode=mode)

# Initialize the environment
env = FinancialPlanningEnv(goal=1000, bag_price=2000)

# Check the environment
check_env(env)

# Create the RL model
model = DQN('MlpPolicy', env, verbose=1)

# Train the model
model.learn(total_timesteps=2000)

# Save the model
model.save("financial_planning_dqn")

# Load the trained model
model = DQN.load("financial_planning_dqn")

# Evaluate the trained model
state, _ = env.reset()
done = False
total_reward = 0
next_milestone = 0.1  # Next milestone is 10% progress
start_time = time.time()

steps_taken = 0  # To keep track of the number of steps taken

while not done:
    action, _states = model.predict(state)
    state, reward, done, truncated, _ = env.step(action)
    total_reward += reward
    steps_taken += 1  # Increment the step counter

    # Check if we've reached the next milestone
    if state[3] >= next_milestone:
        print(f"Reached {next_milestone * 100}% of the goal:")
        print(f"Savings: {state[0]:.2f}, Total Income: {state[1]:.2f}, Total Expenses: {state[2]:.2f}, Goal Progress: {state[3] * 100:.2f}%")
        print(f"Total Reward: {total_reward:.2f}\n")
        
        # Update the next milestone
        next_milestone += 0.1
    
    # Render only at milestone checks
    if state[3] >= next_milestone - 0.1:
        env.render()

end_time = time.time()

print(f"Final Total Reward: {total_reward:.2f}")

# Final output as financial advice
final_savings = state[0]
final_income = state[1]
final_expenses = state[2]
goal_progress = state[3] * 100

# Calculate the period in days
evaluation_period_days = steps_taken

print(f"Final Total Reward: {total_reward:.2f}")
print(f"Total Evaluation Time: {end_time - start_time:.2f} seconds")
print(f"Evaluation Period: {evaluation_period_days} days")
print("\nFinancial Advice:")
print(f"1. You have saved {final_savings:.2f} which is {goal_progress:.2f}% of your savings goal.")
print(f"2. Your total income over the period was RM{final_income:.2f}.")
print(f"3. Your total expenses over the period were RM{final_expenses:.2f}.")
print(f"4. The evaluation period was {evaluation_period_days} days.")
if final_savings >= env.bag_price:
    print(f"5. You have enough savings to buy the bag priced at RM{env.bag_price}. Consider making the purchase if it aligns with your goals.")
else:
    print(f"5. You do not yet have enough savings to buy the bag priced at RM{env.bag_price}. Continue saving to reach your goal.")
if goal_progress >= 100:
    print("6. Congratulations! You have achieved your savings goal. Consider setting a new goal to continue building your financial security.")
else:
    print("6. You are making progress towards your savings goal. Keep up the good work and continue to monitor your spending and saving habits.")


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 25.5      |
|    ep_rew_mean      | -1.78e+04 |
|    exploration_rate | 0.515     |
| time/               |           |
|    episodes         | 4         |
|    fps              | 24691     |
|    time_elapsed     | 0         |
|    total_timesteps  | 102       |
-----------------------------------
-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 18.2      |
|    ep_rew_mean      | -1.08e+04 |
|    exploration_rate | 0.307     |
| time/               |           |
|    episodes         | 8         |
|    fps              | 3435      |
|    time_elapsed     | 0         |
|    total_timesteps  | 146       |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 673       |
|    n_updates        | 11        |
------