In [None]:
import gym
import envs
import numpy as np
import pandas as pd
from envs.custom_env_dir.dqn_agent import DQNAgent
from envs.custom_env_dir.data_handler import DataHandler
from envs.custom_env_dir.sup_model import SupModel
from envs.custom_env_dir.utils import make_env
from datetime import datetime
import os 
import time


''' THIS FUNCTION WILL BE CALLED FROM THE MAIN-METHOD BELOW'''
def run_model(optimizer, gamma, lr, replace, hl, k, store_dir, mlp_hl, mlp_af, mlp_sl, input_dims, mlp, knn, n_BM, uncontrolled, obs, store_results, development, test, test_final):
    
    # Get collection of train, test, dev sets
    train_collection, dev_collection, test_collection, train_count, dev_count, test_count, full_collection = DataHandler().get_data_7d_3split(include_weekends=True, \
                                                                                   n_episodes = 450, start_year=2018, start_month=10, start_day=1)

    
    # Define EV battery capacity in kWh
    battery_capacity = 24
    # Define residential EV charging rate in kW
    charging_rate = 6
    # Set penalty coefficient for incomplete charging
    penalty_coefficient = 12
    
    # Get current directory to store model
    cwd = os.getcwd()
    
    # Initialize best_score for tracking best model
    best_score = -np.inf
    
    # Set parameters for testing
    if test:
        # Use development set for parameter tuning
        if development:
            test_collection = dev_collection
            dataset = 'DEV'
            
        else:
            dataset = 'TEST'
            
        # Create test environment and pass collection of days
        env = gym.make('ChargingEnv-v0', game_collection=test_collection,
                battery_capacity=battery_capacity, charging_rate=charging_rate,
                penalty_coefficient=penalty_coefficient,obs=obs)
        # Simulate each day in the set 10 times with different driving profiles
        n_episodes = len(test_collection)*10
        # Makes sure the agent does not learn during training
        pre_train_steps = np.inf
        # Load previously trained model
        load_checkpoint = True
        filename = dataset+'_'+optimizer+'_gamma'+str(gamma)+'_lr'+(('%.15f' % lr).rstrip('0').rstrip('.'))+'_replace'+str(replace)+'_HL'+str(hl) 
    
    # Set parameters for training
    else: 
        # Create training environment and pass collection of days
        env = gym.make('ChargingEnv-v0', game_collection=train_collection,
                battery_capacity=battery_capacity, charging_rate=charging_rate,
                penalty_coefficient=penalty_coefficient,obs=obs)
        # Do not load a checkpoint - train new model
        load_checkpoint = False
        # Train model for n_episodes episodes
        n_episodes = 50000
        # Specify number of random episodes before epsilon starts to decrease
        pre_training_steps = 5000
        
        print('Train model for ' + str(n_episodes) + ' episodes with ' + str(pre_training_steps) + ' pre-train steps ...')
        filename = 'TRAIN'+'_'+optimizer+'_gamma'+str(gamma)+'_lr'+(('%.15f' % lr).rstrip('0').rstrip('.'))+'_replace'+str(replace)+'_HL'+str(hl)
        
    # Print information if using night benchmark
    if n_BM:
        if development:
            print('Night benchmark on development set')
        else:
            print('Night benchmark on test set')
    # Print information if using uncontrolled charging
    if uncontrolled:
        if development:
            print('Uncontrolled benchmark on development set')
        else:
            print('Uncontrolled benchmark on test set')

    # Create the RL agent with a DQN
    agent = DQNAgent(gamma=gamma, fc1_dims= hl[0], fc2_dims= hl[1], epsilon=1.0, lr=lr,
                     input_dims=input_dims, n_actions=len(env.action_space), mem_size=100000, 
                     eps_min=0.1,batch_size=32, replace=replace, eps_dec=1e-5, optimizer=optimizer,
                     chkpt_dir=store_dir,algo='DQNAgent', env_name='ChargingEnv-v0')
        
    # Load agent/model parameters from previously trained model
    if load_checkpoint:
        if test_final:
            agent.load_models_final()
            filename = filename + '_finalmodel'
        elif mlp:
            sup_model = SupModel().load_model_mlp(store_dir, mlp_hl, mlp_af, mlp_sl)
            sup_scaler = SupModel().load_scaler(store_dir)
            filename = 'MLP_'
        elif knn:
            sup_model = SupModel().load_model_kneighbors(store_dir, k)
            sup_scaler = SupModel().load_scaler(store_dir)
            filename = 'KNN_'
        else:
            agent.load_models()
        # Do not take any random actions, strictly act according to policy
        agent.epsilon = 0
    
    #n_steps = 0
    
    # Lists to store all relevant data while training or testing
    price_list, soc_list, action_list, dates, day_cats, starts, ends, scores, avg_scores, eps_history, pen_history, steps_array, final_soc, \
    discounted_action_list, temp_list = [], [], [], [], [], [], [], [], [], [], [], [], [], [], []
    
    for i in range(n_episodes):
        # Get initial observation from the environment
        observation = env.reset(test, i)
        # Here, the return is called 'score'
        score = 0
        # Create lists to store training/test data
        episode_prices, episode_soc, episode_actions, episode_day_cats, episode_discounted_actions, episode_temps = [], [], [], [], [], []
        
        # Loop for 24 h/steps in each episode/game
        for n_steps in range(24):
            
            # No action can be taken while vehicle is not parked
            if env.parking == 0:
                action = '-'
                # Store each action taken
                episode_actions.append(action)
                episode_discounted_actions.append(action)
                # Receive new observation and reward(=0)
                observation_, reward = env.non_parking_step()
                score += reward
                
            # While the vehicle is parked choose action according to current policy or benchmark strategy
            else:
                # Test with night benchmarking approach: immediately discharge the vehicle in the evening, only charge between 02:00-06:00
                if n_BM:
                    filename = dataset + '_BENCHMARK_Night_2-6'
                    if n_steps<14 and env.soc != 0:
                        action=1 # Discharge the vehicle before 02:00 a.m.
                    elif n_steps<14 and env.soc == 0:
                        action=2 # Do nothing if the vehicle is fully discharged before 2 a.m.
                    elif n_steps<18 and env.soc != 1:
                        action=0 # Charge the vehicle between 2-6 a.m.
                    else:
                        action=2 # Do nothing after vehicle is fully charged                
                
                # Test with simple benchmarking approach: always charge the vehilce (no control mechanism and V2G/V2H)
                elif uncontrolled:
                    filename = dataset + '_BENCHMARK_Uncontrolled'
                    action=0
                
                # test with supervised model
                elif mlp or knn:
                    obs = np.array(observation)
                    # Scale data
                    obs = sup_scaler.transform(obs.reshape(1, -1))
                    # Predict optimal action based on observation
                    action = sup_model.predict(obs.reshape(1, -1))[0]

                # Deep reinforcement agent takes action according to policy learned
                else:
                    action = agent.choose_action(observation)

                # Store each action taken for evaluation and visualization
                episode_actions.append(env.action_space[action]/env.charging_rate)
                episode_discounted_actions.append(env.discounted_action)
                
                # Take a step and receive reward and new observation
                observation_, reward = env.step(action)
                score += reward
                
                # Fill replay memory while training
                if not load_checkpoint:                    
                    agent.store_transition(observation, action,
                                         reward, observation_)
                    
                    # Start learning after defined number of random steps
                    if i > pre_training_steps:
                        agent.learn()
            
            # Store all prices, temps, soc for each episode
            episode_prices.append(env.hourly_prices['Spot'][n_steps+168])
            episode_temps.append(env.hourly_prices['temp'][n_steps+168])
            episode_soc.append(env.soc)
            episode_day_cats.append(env.day_cat) if env.day_cat not in episode_day_cats else episode_day_cats
            
            # Update observation
            observation = observation_
        
        # Store all relevant data for evaluation
        temp_list.append(episode_temps)
        price_list.append(episode_prices)
        soc_list.append(episode_soc)
        action_list.append(episode_actions)
        discounted_action_list.append(episode_discounted_actions)
        dates.append(env.game_date)
        day_cats.append(episode_day_cats)
        starts.append(env.start_time)
        ends.append(env.end_time)
        final_soc.append(env.soc)
        scores.append(score)
        eps_history.append(agent.epsilon)
        pen_history.append(env.penalty_coefficient)
        avg_score = np.mean(scores[-100:])
        avg_scores.append(avg_score)
        
        # Print average score every 100 episodes
        if i%100==0:
            print('episode: ', i,'score: ', score,
                 ' average score %.1f' % avg_score, 'best score %.2f' % best_score,
                'final_soc', env.soc, 'epsilon %.2f' % agent.epsilon, 'steps', n_steps)
        
        # Store model parameters when new moving average score outperforms previous best model
        if avg_score > best_score:
            if not load_checkpoint:
                agent.save_models()
            best_score = avg_score

    # Store final model parameters after all training episodes
    if not load_checkpoint:
        agent.save_models_final()
    
    # Calculate the average score
    score = sum(scores)/len(scores)

    print('The average score is ', sum(scores)/len(scores))

    # Store data for all training episodes in csv file
    if store_results:
        DataHandler().store_results(price_list, soc_list, action_list, dates, \
                                    day_cats, starts, ends, scores, \
                                    avg_scores, final_soc, eps_history, pen_history, \
                                    filename, optimizer, gamma, lr, replace, store_dir, \
                                    discounted_action_list,temp_list)
    else: 
        print('Store results disabled')
    

if __name__ == '__main__':
    i = 1
    cwd = os.getcwd()
    
    ''' SET INPUT FEATURES WITH STRING '''
    # The obs string and the resepctive input features variables must have been defined in charging_env.py
    obs = 'obs4(t_sin,t_cos,daycat,temp0)'
    # Set respective input dimensions for the DQN
    input_dims = 4
    
    ''' SET DQN AGENT PARAMETERS '''
    # If you want to test an agent that has been trained already the parameters have to match!
    optimizer = 'Adam'
    gamma = 0.8
    lr = 0.0001
    replace = 2
    hl = [64,64]
    
    ''' SELECT AT LEAST ONE OF THE THREE FOLLOWING OPTIONS '''
    # Select dataset for training or test on test/dev set
    do_train = False
    do_dev = True
    do_test = True
    
    ''' SELECT NO MORE THAN ONE OF THE FOUR FOLLOWING OPTIONS '''
    # Select true if you want to test a previously trained k-NN classifer
    knn = False
    # Select true if you want to test a previously trained MLP classifer
    mlp = False
    # Select if you want to test the night benchmark
    n_BM = False
    # Select if you want to test uncontrolled charging
    uncontrolled = False

    ''' IF k-NN OR MLP SELECTED -> SPECIFY PARAMETERS '''    
    # Specifiy k-nearest neighbors parameters if required
    k = 15
    # Specify MLP parameters if required
    mlp_hl = (16)
    mlp_af = 'relu'
    mlp_sl = 'adam'
    
    ''' SELECT IF YOU WANT TO STORE TRAIN/TEST INFORMATION IN A CSV FILE '''
    store_results = False
    
    
    ''' NO ADJUSTMENTS REQUIRED FROM HERE ''' 
    # Set store directory depending on previous decision
    if knn:
        info = ' | k = ' + str(k) + ' | ' + obs
        store_dir = cwd +'/knn_models/'+ 'KNeighbors_k(' + str(k) + ')' + '_' + obs
    elif mlp:
        info = ' | optimizer=' + mlp_sl + ' | activation function: ' + mlp_af + ' | hl: ' + str(mlp_hl) + ' | ' + obs
        store_dir = cwd +'/mlp_models/' + 'MLP_hl(' + str(mlp_hl) + ')_af(' + str(mlp_af) + ')_sl(' + str(mlp_sl) + ')' + '_' + obs
    else:
        info = ' | optimizer=' + optimizer + ' | gamma=' + str(gamma) + ' | lr='+(('%.15f' % lr).rstrip('0').rstrip('.')) + ' | replace=' + str(replace) + ' | HL: ' + str(hl) + ' | ' + obs
        store_dir = cwd +'/dqn_models/'+ optimizer+'_gamma'+str(gamma)+'_lr'+(('%.15f' % lr).rstrip('0').rstrip('.'))+'_replace'+str(replace)+'_HL'+str(hl) + '_' + obs
    
    # Train
    if do_train:
        print('---------- TRAIN session: ', i, info)
        os.makedirs(store_dir)
        start = time.time()            
        run_model(optimizer, gamma, lr, replace, hl, k, store_dir, mlp_hl, mlp_af, mlp_sl, input_dims, mlp, knn, n_BM, uncontrolled, obs, store_results, development=False, test=False, test_final=False)
        end = time.time()
        print('Training took ', end-start, ' seconds...')
    
    # DEV
    if do_dev:
        print('---------- DEV session: ' +str(i)+info)
        run_model(optimizer, gamma, lr, replace, hl, k, store_dir, mlp_hl, mlp_af, mlp_sl, input_dims, mlp, knn, n_BM, uncontrolled, obs, store_results, development=True, test=True, test_final=False)

    # Test
    if do_test:
        print('---------- TEST session: ', i, info)
        run_model(optimizer, gamma, lr, replace, hl, k, store_dir, mlp_hl, mlp_af, mlp_sl, input_dims, mlp, knn, n_BM, uncontrolled, obs, store_results, development=False, test=True, test_final=False)
    
    # Print information that nothing is trained or tested...
    if not (do_train or do_dev or do_test):
        print('Select do_train / do_dev / do_test')