<a href="https://colab.research.google.com/github/atharvanaik10/CryptoSAC/blob/main/soft_actor_critic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Discrete Soft Actor Critic (SAC) for crypto trading

In [None]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pickle
from google.colab import drive
from collections import namedtuple, deque
import random


import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Replay Memory Buffer

In [None]:
Transition = namedtuple('Transition', ('state', 'action', 'reward', 'next_state'))

class ReplayMemory(object):
  def __init__(self, capacity):
    self.capacity = capacity
    self.memory = deque([], maxlen=capacity)

  def push(self, *args):
    self.memory.append(Transition(*args))

  def sample(self, batch_size):
    return random.sample(self.memory, batch_size)

  def __len__(self):
    return len(self.memory)

## Critic Network

In [None]:
# Hyperparams for critic layers
LAYER_1_SIZE = 128
LAYER_2_SIZE = 256

class Critic(nn.Module):
  def __init__(self, state_dims, action_dims, learning_rate):
    super(Critic, self).__init__()

    self.state_dims = state_dims
    self.action_dims = action_dims

    self.layer1 = nn.Linear(self.state_dims + action_dims, LAYER_1_SIZE)
    self.layer2 = nn.Linear(LAYER_1_SIZE, LAYER_2_SIZE)
    self.outlayer = nn.Linear(LAYER_2_SIZE, 1)

    self.optimizer = optim.Adam(self.parameters(), lr=learning_rate)
    self.to(device)

  def forward(self, state, action):
    # Calculate the quality of the state and action Q(s,a)
    quality = self.layer1(T.cat([state, action], dim=1))
    quality = F.relu(quality)

    quality = self.layer2(quality)
    quality = F.relu(quality)

    quality = self.outlayer(quality)

    return quality

## Actor Network

In [None]:
# Hyperparams for actor layers
LAYER_1_SIZE = 128
LAYER_2_SIZE = 256

class Actor(nn.Module):
    def __init__(self, state_dims, action_dims, learning_rate):
        super(Actor, self).__init__()

        self.state_dims = state_dims
        self.action_dims = action_dims

        self.layer1 = nn.Linear(self.state_dims, LAYER_1_SIZE)
        self.layer2 = nn.Linear(LAYER_1_SIZE, LAYER_2_SIZE)
        self.outlayer = nn.Linear(LAYER_2_SIZE, action_dims)

        self.optimizer = optim.Adam(self.parameters(), lr=learning_rate)
        self.to(device)

    def forward(self, state):
        # Calculate the policy probablity pi_a(s)
        policy = self.layer1(state)
        policy = F.relu(policy)

        policy = self.layer2(policy)
        policy = F.relu(policy)

        policy = self.outlayer(policy)
        policy = F.softmax(policy, dim=-1)

        return policy
