In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# Define the action space
actions = ['charge', 'idle', 'discharge']

# Modify the get_state function to include the battery capacity
def get_state(price, demand, timestamp, battery_capacity):
    return (price, demand, timestamp, battery_capacity)

In [None]:
# Define battery parameters
battery_capacity = 100  # in watt hours
charge_discharge_rate = 20 # in watts
initial_battery_state = battery_capacity / 2  # start with the battery half-charged

# Define the demand profile for a regular German private household within a workday

demand_profile = [0.3, 0.2, 0.2, 0.2, 0.3, 0.4, 0.6, 0.8, 0.7, 0.6, 0.6, 0.6, 0.7, 0.7, 0.7, 0.8, 1.0, 1.2, 1.3, 1.2, 1.0, 0.8, 0.6, 0.4]
demand_profile = [np.random.normal(item, 0.05, 4) for item in demand_profile]  # generate 4 random values around each item to get a value for every 15 minutes
demand_profile = [item for sublist in demand_profile for item in sublist]  # flatten the list

# Define the electricity cost for a regular German private household within a workday
electricity_cost = [0.3, 0.2, 0.2, 0.2, 0.3, 0.4, 0.6, 0.8, 0.7, 0.6, 0.6, 0.6, 0.7, 0.7, 0.7, 0.8, 1.0, 1.2, 1.3, 1.2, 1.0, 0.8, 0.6, 0.4]
electricity_cost = [np.random.normal(item, 0.05, 4) for item in electricity_cost]  # generate 4 random values around each item to get a value for every 15 minutes
electricity_cost = [item for sublist in electricity_cost for item in sublist]  # flatten the list

# Create a DataFrame with the demand profile and the electricity cost
df = pd.DataFrame({'demand': demand_profile, 'cost': electricity_cost})

# Attach datetime
df['datetime'] = pd.date_range(start='2023-07-05', periods=len(df), freq='15T')

# create new dataframe which is a copy of first 24 rows
df2 = df.iloc[0:24].copy()


In [None]:
# Plot the demand profile and the electricity cost
plt.figure(figsize=(10, 5))

# Plot the demand profile
plt.subplot(1, 2, 1)
#plt.plot(df['datetime'], df['demand'])
plt.plot(df2['datetime'], df2['demand'])
plt.xlabel('Time')
plt.ylabel('Demand (W)')
plt.title('Demand Profile')

# Plot the electricity cost
plt.subplot(1, 2, 2)
plt.plot(df2['datetime'], df2['cost'])
#plt.plot(df['datetime'], df['cost'])
plt.xlabel('Time')
plt.ylabel('Cost (€/W)')
plt.title('Electricity Cost')

plt.tight_layout()
plt.show()

In [None]:
# Define the new reward function
def get_reward(state, action):
    current_price, current_demand, timestamp, battery_capacity = state

    print('#####')
    print('timestamp: ', timestamp)
    print('battery_capacity: ', battery_capacity)
    print('current_demand: ', current_demand)
    print('current_price: ', current_price)
    print('action: ', action)
    print('#####')
    print("")

    # Calculate the cost with the battery
    if action == 'discharge':
        battery_discharge = min(battery_capacity, current_demand)
        grid_demand = current_demand - battery_discharge

        print('action is battery_discharge: ', battery_discharge)
    elif action == 'charge':
        battery_charge = min(100 - battery_capacity, charge_discharge_rate)
        grid_demand = current_demand + battery_charge
    else:  # action == 'idle'
        grid_demand = current_demand

    cost_with_battery = grid_demand * current_price

    # Calculate the cost without the battery
    cost_without_battery = current_demand * current_price

    # The reward is the difference in costs
    reward = cost_without_battery - cost_with_battery

    return reward

In [None]:
# Initialize the Q-table to include the battery capacity in terms of watt hours
Q_table = {get_state(price, demand, timestamp, battery_capacity): {action: 0 for action in actions} for price in df['cost'] for demand in df['demand'] for timestamp in range(24) for battery_capacity in range(0, 101, charge_discharge_rate)}

In [None]:
import random

# Parameters for Q-learning
alpha = 0.5  # learning rate
gamma = 0.9  # discount factor
num_iterations = 100  # number of iterations for exploration

In [None]:
# Define a function for the agent to select the best action in a state
def get_best_action(state):
    print('hello')
    #print(Q_table[state])
    print(max(Q_table[state], key=Q_table[state].get))

    return max(Q_table[state], key=Q_table[state].get)

In [None]:
# Define the exploration rate
epsilon = 0.1

# Modify the exploration phase of Q-learning to include the battery capacity in terms of watt hours
total_rewards = []  # list to store the total reward in each episode

for _ in range(num_iterations):

    # Select a random state
    state = random.choice(list(Q_table.keys()))
    
    # Initialize total reward for this episode
    total_reward = 0
    battery_state = initial_battery_state  # reset the battery state at the start of each 
    
    for timestamp in range(24):  # 96 time steps (15 minutes each) in a 24-hour period
        
        # Update the state based on the current timestamp
        state = get_state(df2['cost'][timestamp], df2['demand'][timestamp], timestamp, battery_state)
   
        # Select an action
        if random.uniform(0, 1) < epsilon:
            # Exploration: select a random action
            action = random.choice(actions)
        else:
            # Exploitation: select the best action
            action = get_best_action(state)
        # Calculate the reward for the action
        reward = get_reward(state, action)
        total_reward += reward  # add the reward to the total reward for this episode
        # Update the battery capacity based on the action
        if action == 'charge':
            battery_state = min(battery_state + charge_discharge_rate, battery_capacity)
        elif action == 'discharge':
            battery_state = max(battery_state - charge_discharge_rate, 0)
        # Update the Q-value for the state-action pair
        Q_table[state][action] = (1 - alpha) * Q_table[state][action] + alpha * (reward + gamma * max(Q_table[state].values()))
    total_rewards.append(total_reward)  # store the total reward for this episode

# Plot the total reward in each episode
plt.plot(total_rewards)
plt.xlabel('Episode')
plt.ylabel('Total Reward')
plt.show()

In [None]:
# Calculate the cumulative reward for each episode
cumulative_rewards = np.cumsum(total_rewards)
# Plot the cumulative reward of each episode
plt.plot(cumulative_rewards)
plt.xlabel('Episode')
plt.ylabel('Cumulative Reward')
plt.title('Cumulative Reward over Episodes')
plt.show()

In [None]:
# Print the final Q-table
#for state, actions in Q_table.items():
#    print(f'State: {state}')
#    for action, Q_value in actions.items():
#        print(f'    Action: {action}, Q-value: {Q_value}')