In [2]:
from IPython.display import display, HTML

display(HTML(data="""
<style>
    div#notebook-container    { width: 95%; }
    div#menubar-container     { width: 65%; }
    div#maintoolbar-container { width: 99%; }
</style>
"""))

In [3]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import *
import copy
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim
import seaborn

In [4]:
class nnet(nn.Module):
    #a linear neural network with a sigmoid activiation function and a single hidden layer
    def __init__(self, params):
        super(nnet, self).__init__()
        self.D_in = params['FEATURE_DIM']
        self.H1 = params['HIDDEN_1']
        self.D_out = params['OUTPUT_DIM']
        self.l1 = nn.Linear(self.D_in, self.H1)
        self.l2 = nn.Linear(self.H1, self.D_out)
    
    def forward(self, x):
        x = torch.sigmoid(self.l1(x)) #sigmoid activation
        x = self.l2(x) #hidden layer
        return(x)

In [5]:
#global variables --- same as CDC paper for small players
x_bar = 1.0   #maximum consumption
noise_var = 1.0    #variance of noise values
g = np.arange(0,1.1,0.1)  #discretization of space of power consumption values x \in [0,\bar{x}]
s_vals = np.arange(-2.0,2.0,0.1)  #discretization of space of noise values, 98% of normal distribution with unit variance
samples = np.random.normal(0, noise_var, size=100000)
s_hist = np.histogram(samples, bins=np.arange(-2.0,2.1,0.1), normed=True)[0]
s_hist = (1.0/np.linalg.norm(s_hist, ord=1)) * s_hist #probabilities over discretized noise values
ramp_const = 0.3  #time coupling ramping constraint
pi_cp_perc = 0.6  #ratio of pi_cp cost to total utility under a naive fixed strategy

net_params = {'FEATURE_DIM': 4, 'OUTPUT_DIM': 1, 'HIDDEN_1': 16}

base_path = "/home/chase/projects/peakload/notebooks/" #change this to path to notebook + data

In [6]:
#helper functions 

def player_return(utility_func, plays, system_vals, pi_cp):
    indicator_vec = vec_softmax(system_vals)
    cp_cost = pi_cp(plays*indicator_vec)
    rev = np.sum(utility_func(plays))
    out = rev - cp_cost
    return(out)

def vec_softmax(system_vals, beta=50):
    #system_vals must be numpy array
    if type(np.array([1])) != np.ndarray:
        print("System values not numpy array")
    else:
        #large beta approximates argmax indicator vector
        numer = np.exp(beta*system_vals)
        denom = np.sum(np.exp(beta*system_vals))
        out = (1.0/denom)*numer
        return(out)
    
def linear_player_utility_lambda(a=2):
    return(lambda x: a*x)
    
def player_utility_lambda(a=1):
    return(lambda x: a*np.log(1 + x))

def player_d_utility_lambda(a=1):
    return(lambda x: a/(1 + x))

def player_dd_utility_lambda(a=1):
    return(lambda x: -a/np.power(1 + x, 2))

In [None]:
#no noise, no ramp constraints

#two players perform gradient descent on neural policies

#compare to nash equillibrium

#without scheduled learning rate

#with scheduled learning rate

### Discretize and force ties

### No noise, no ramp constraints, two players, linear utility

In [None]:
T = 10

player_1_policy = nnet(net_params)
player_2_policy = nnet(net_params) #players are different due to random initialization

player_1_util = linear_player_utility_lambda(a=5)
player_2_util = linear_player_utility_lambda(a=2)

loss_player_1 = nn.MSELoss()
loss_player_2 = nn.MSELoss()

optimizer_1 = optim.SGD(player_1_policy.parameters(), lr=0.005, momentum=0.9)
optimizer_2 = optim.SGD(player_2_policy.parameters(), lr=0.005, momentum=0.9)

epochs = 10000

prev_play_1 = np.random.uniform(0,1)
prev_play_2 = np.random.uniform(0,1)

player_1_paths = []
player_2_paths = []

player_1_training_pairs = []
player_2_training_pairs = []

player_1_returns = []
player_2_returns = []

for e in range(epochs):
    if e % 1000 == 0:
        print("epoch: ", e)
    
    #path for each epoch
    player_1_paths.append([])
    player_2_paths.append([])
    
    #plays for each epoch
    plays = np.zeros((3, T))
    
    #initialize max_so_far
    max_so_far = 0.0
    
    #play game
    for t in range(T):
        rounds_left = float(T - t)/float(T)
        
        player_1_input = Variable(torch.Tensor([prev_play_1, max_so_far, rounds_left, 1.0]))
        player_2_input = Variable(torch.Tensor([prev_play_2, max_so_far, rounds_left, 1.0]))
        
        player_1_next = float(player_1_policy(player_1_input).data)
        player_2_next = float(player_2_policy(player_2_input).data)
        
        plays[0,t] = player_1_next
        plays[1,t] = player_2_next
        plays[2,t] = player_1_next + player_2_next
        
        max_so_far = np.max(plays[2,:])
        
        #append current input and output to player path
        player_1_paths[-1].append((np.asarray(player_1_input), player_1_next))
        player_2_paths[-1].append((np.asarray(player_2_input), player_1_next))
        
        
    #compute reward for player given completed path
    player_1_returns.append(player_return(player_1_util, plays[0,:], plays[2,:], pi_cp))
    player_2_returns.append(player_return(player_2_util, plays[1,:], plays[2,:], pi_cp))
        
    #for each path + reward, compute incremental step to improve reward --- choose all non CP rounds, increase incrementally, choose CP round and decrease incrementally
    
    #schedule increments
    
    #once enough training examples are batched
    
        #MSE loss on each player, backward, reset paths and rewards
        

In [11]:
lis = []
lis.append([])
print(lis)

[[]]
