In [None]:
import numpy as np
import pandas as pd

# Assuming 'dataframe' is your DataFrame and 'timestamp' is in datetime format

def create_features(df, external_state):
    df['return'] = external_state['price'].pct_change()
    df['hour'] = pd.to_datetime(external_state['timestamp']).dt.hour
    df['day_of_week'] = external_state['timestamp'].dt.dayofweek
    df['sin_hour'] = np.sin(2 * np.pi * df['hour'] / 24)
    df['cos_hour'] = np.cos(2 * np.pi * df['hour'] / 24)
    df['sin_day'] = np.sin(2 * np.pi * df['day_of_week'] / 7)
    df['cos_day'] = np.cos(2 * np.pi * df['day_of_week'] / 7)
    # Define peak hours
    df['is_peak'] = df['hour'].apply(lambda x: 1 if 17 <= x < 21 else 0)
    df['is_off_peak'] = df['hour'].apply(lambda x: 1 if 0 <= x < 17 or 21 <= x < 24 else 0)
    # Create a binary feature: 1 if price is negative, 0 otherwise
    df['price_negative'] = (df['price'] < 0).astype(int)
    return df


In [None]:
# Normalize 
# hour = current_features['hour'] / 24.0
# day_of_week = current_features['day_of_week'] / 7.0

peak_hours = [17, 18, 19, 20]
off_peak_hours = [hour for hour in range(24) if hour not in peak_hours]
        
def current_hour(df):
    """
    Extracts the hour from the timestamp in the dataframe.

    :param df: DataFrame containing a 'timestamp' column.
    :return: The hour extracted from the 'timestamp' column.
    """
    return pd.to_datetime(df).hour

def is_isnot_peak(hour):
    """
    Determines if the given hour is within peak hours.

    :param hour: The hour to check.
    :return: True if the hour is within peak hours, False otherwise.
    """
    peak_hours = [17, 18, 19, 20]
    return hour in peak_hours


time = external_state['timestamp']


In [None]:
def calculate_soc_reward(SoC, low_threshold, high_threshold, low_soc_penalty, high_soc_penalty):
    reward = 0
    if SoC < low_threshold:
        reward -= low_soc_penalty  # Penalize low SoC to avoid depletion
    elif SoC > high_threshold:
        reward -= high_soc_penalty  # Penalize high SoC to avoid overcharging
    return reward
    


In [None]:
def calculate_peak_reward(is_peak_time, is_off_peak_time, selling_price, buying_price, peak_profit_multiplier, peak_cost_multiplier, off_peak_profit_multiplier, off_peak_cost_multiplier):
    if is_peak_time:
        return (selling_price * peak_profit_multiplier) - (buying_price * peak_cost_multiplier)
    elif is_off_peak_time:
        return (selling_price * off_peak_profit_multiplier) - (buying_price * off_peak_cost_multiplier)
    else:
        return 0


In [None]:
def calculate_combined_logic_reward(action, external_state, internal_state):
    reward = 0
    charge_kW, solar_kW_to_battery = action
    price = external_state['price']  
    pv_power = external_state['pv_power']  
    battery_soc = internal_state['battery_soc'] / internal_state['battery_capacity']  # Normalize SOC
    is_peak_hour = external_state['timestamp'].hour in [17, 18, 19, 20]  # Define peak hours
    is_off_peak_hour = not is_peak_hour

    # Assuming charge_kW is positive for charging and negative for discharging
    energy_amount = abs(charge_kW)  # Use the absolute value for calculations

    # Charging logic
    if charge_kW > 0:
        if is_off_peak_hour:
            cost = energy_amount * price * 1.05  # 5% penalty
        else:
            cost = energy_amount * price * 1.40  # Normal cost during peak
        reward -= cost
        if solar_kW_to_battery > energy_amount:
            reward += 0.1 * cost  # Small bonus for using solar power

    # Discharging logic
    elif charge_kW < 0:  # Using negative charge_kW to indicate discharging
        if is_peak_hour:
            revenue = energy_amount * price * 1.30  # 30% bonus
        elif is_off_peak_hour:
            revenue = energy_amount * price * 0.85  # 15% reduction
        else:
            revenue = energy_amount * price
        reward += revenue

    # Battery efficiency and SOC considerations
    if battery_soc > 0.9:  # Near full capacity
        reward -= 10  # Discourage overcharging
    elif battery_soc < 0.1:  # Very low SOC
        reward -= 10  # Discourage depletion

    return reward

Simple rewards

In [None]:
def reward_charge(external_state, internal_state, charge_kW, solar_kW_to_battery):
    price = external_state['price']
    solar_power = external_state['pv_power']
    hour = pd.to_datetime(external_state['timestamp']).hour
    SoC = internal_state['SoC']
    max_capacity = internal_state['max_capacity']
    
    reward = 0
    if charge_kW > 0:  # Charging scenario
        if price < 0:  # Negative price, more beneficial to charge from grid
            reward += abs(price) * charge_kW  # Profit from grid charging
        else:  # Positive price, cost incurred
            reward -= price * charge_kW  # Cost from grid charging
        
        # Check for overcharging potential
        if SoC + charge_kW <= max_capacity:
            reward += charge_kW * efficiency_bonus  # Reward efficient charging
        else:
            reward -= overcharge_penalty  # Penalty for overcharging
        
    # Using solar to charge
    if solar_kW_to_battery > 0:
        if SoC + solar_kW_to_battery <= max_capacity:
            reward += solar_kW_to_battery * solar_efficiency_bonus  # Reward using solar power
        else:
            reward -= overcharge_penalty  # Penalty for overcharging using solar

    return reward

def reward_discharge(external_state, internal_state, charge_kW):
    price = external_state['price']
    SoC = internal_state['SoC']
    min_capacity = internal_state['min_capacity']
    
    reward = 0
    if charge_kW < 0:  # Discharging scenario
        discharge_kW = -charge_kW
        if price > 0:  # Positive price, beneficial to discharge
            reward += price * discharge_kW  # Profit from discharging
        else:  # Negative price, not beneficial to discharge
            reward -= abs(price) * discharge_kW  # Loss from discharging during negative prices
        
        # Check for undercharging potential
        if SoC - discharge_kW >= min_capacity:
            reward += discharge_kW * discharge_efficiency_bonus  # Reward efficient discharging
        else:
            reward -= undercharge_penalty  # Penalty for excessive discharging

    return reward



more complicated rewards

In [None]:
# This function calculates the reward for selling solar power directly to the grid. It considers the time of day to adjust the profits based on peak and off-peak hours.
def reward_for_selling_solar_direct(external_state, internal_state):
    reward = 0
    price = external_state['price']
    solar_power = external_state['pv_power']
    timestamp = pd.to_datetime(external_state['timestamp'])
    hour = timestamp.hour

    # Determine if it's off-peak or on-peak hours
    if 0 <= hour < 17 or 21 <= hour < 24:
        # Off-Peak Hours
        feed_in_tariff_adjustment = 0.85  # 15% reduction in profits
    elif 17 <= hour < 21:
        # On-Peak Hours
        feed_in_tariff_adjustment = 1.30  # 30% increase in profits
    else:
        # Default to no adjustment if outside expected range
        feed_in_tariff_adjustment = 1

    # Check if selling solar directly is beneficial
    if price < 0:
        # Apply feed-in tariff adjustment based on the time of day
        adjusted_feed_in_tariff = price * feed_in_tariff_adjustment
        reward += solar_power * adjusted_feed_in_tariff

    return reward

# This function calculates the reward for discharging the battery during high-profit times. It encourages discharging when prices are high, especially during peak hours.
def reward_discharge_high_profit(external_state, internal_state, charge_kW):
    reward = 0
    price = external_state['price']
    is_peak_hour = external_state['timestamp'].hour in [17, 18, 19, 20]
    battery_soc = internal_state['battery_soc']
    high_soc_threshold = 0.9  # Assuming a threshold value

    # Ensure we're considering discharging scenarios
    if charge_kW < 0:
        energy = abs(charge_kW)
        if is_peak_hour:
            # Encourage discharging during peak hours for a bonus
            reward = energy * price * 1.30  # 30% bonus on revenues
        elif battery_soc > high_soc_threshold:
            # If battery is nearly full, allow discharging even off-peak to prevent overcharging
            reward = energy * price * 0.85  # 15% penalty still better than wasting potential energy
        else:
            # Discourage discharging during off-peak hours
            reward = energy * price * 0.85  # 15% penalty
    return reward

# This function calculates the reward for charging the battery when costs are low, prioritizing solar power usage before resorting to grid power.
def reward_charge_low_cost(external_state, internal_state, charge_kW, solar_kW_to_battery):
    reward = 0
    price = external_state['price']
    is_off_peak_hour = not external_state['timestamp'].hour in [17, 18, 19, 20]
    solar_power_available = external_state['pv_power']
    solar_efficiency = 1.0  # Assuming an efficiency value

    # Use solar power first to charge the battery if available
    if solar_power_available > 0 and charge_kW > 0:
        energy = solar_kW_to_battery
        reward = energy * solar_efficiency - (energy * price * 1.05)  # 5% penalty on grid power used
    elif charge_kW > 0:
        energy = charge_kW
        if is_off_peak_hour:
            # If no solar power, charge from grid with a penalty
            reward = -(energy * price * 1.05)  # 5% penalty
        else:
            # Charging during peak hours (with higher cost)
            reward = -(energy * price * 1.40)  # Includes peak hour penalty
    return reward

# This function calculates the reward for charging the battery during times of negative electricity prices, taking into account the state of charge and the use of solar power.
def reward_for_negative_price_charging(external_state, internal_state, charge_kW, solar_kW_to_battery):
    reward = 0
    price = external_state['price']
    battery_soc = internal_state['battery_soc']
    max_soc = internal_state['battery_capacity']
    solar_power = external_state['pv_power']

    # Determine the energy being charged or discharged
    energy = abs(charge_kW) if charge_kW < 0 else solar_kW_to_battery

    if price < 0:  # Negative price scenario
        # Increase rewards for charging during negative prices
        if charge_kW > 0:  # Ensure we're considering charging scenarios
            reward += abs(price) * energy  # Gain is proportional to how negative the price is
            new_soc = battery_soc + energy
            if new_soc <= max_soc:
                # Assuming 'high_import_bonus' is a predefined bonus value for charging up to capacity without overcharging
                reward += energy * high_import_bonus  # Bonus for charging up to capacity without overcharging
            else:
                # Assuming 'overcharge_penalty' is a predefined penalty value for charging beyond max capacity
                reward -= overcharge_penalty  # Penalty if charging goes beyond max capacity

    # Adjust for solar power directed to the battery during negative price, if applicable
    if solar_power > 0 and price < 0:
        new_soc_with_solar = battery_soc + solar_power
        if new_soc_with_solar <= max_soc:
            # Assuming 'lower_solar_use_reward' is a predefined reward value for using solar during negative price
            reward += solar_power * lower_solar_use_reward  # Reward for using solar power during negative price

    return reward


If predicitons used

In [None]:
def reward_for_optimal_battery_usage(external_state, internal_state, charge_kW, solar_kW_to_battery, forecast_negative_price):
    reward = 0
    price = external_state['price']
    battery_soc = internal_state['battery_soc']
    max_soc = internal_state['battery_capacity']
    # Assuming 'prepare_capacity_bonus' and 'miss_opportunity_cost' are predefined values

    # Calculate the energy based on charging or solar power directed to the battery
    energy = charge_kW if charge_kW > 0 else solar_kW_to_battery

    if forecast_negative_price:
        # If negative prices are forecasted, incentivize preparing capacity
        if battery_soc + energy <= max_soc:
            # Preparing for negative price periods by ensuring capacity is available
            reward += prepare_capacity_bonus
    else:
        # If no negative prices are forecasted, reward conserving battery capacity
        if battery_soc < max_soc:
            reward -= miss_opportunity_cost

    return reward