In [1]:
import tensorflow as import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras import Model

mnist = keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

x_train = x_train[..., tf.newaxis]
x_test = x_test[..., tf.newaxis]

train_ds = tf.data.Dataset.from_tensor_slices(
    (x_train, y_train)).shuffle(10000).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

class MyModel(Model):
    def __init__(self):
        super(MyModel, self).__init__()
        self.conv1 = Conv2D(32, 3, activation='relu')
        self.flatten = Flatten()
        self.d1 = Dense(128, activation='relu')
        self.d2 = Dense(10, activation='softmax')

    def call(self, x):
        x = self.conv1(x)
        x = self.flatten(x)
        x = self.d1(x)
        return self.d2(x)


model = MyModel()

loss_object = tf.keras.losses.SparseCategoricalCrossentropy()

optimizer = tf.keras.optimizers.Adam()

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
    name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
    name='test_accuracy')


@tf.function
def train_step(images, labels):
    with tf.GradientTape() as tape:
        predictions = model(images)
        loss = loss_object(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    train_loss(loss)
    train_accuracy(labels, predictions)


@tf.function
def test_step(images, labels):
    predictions = model(images)
    t_loss = loss_object(labels, predictions)

    test_loss(t_loss)
    test_accuracy(labels, predictions)


EPOCHS = 5

for epoch in range(EPOCHS):
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

    for images, labels in train_ds:
        train_step(images, labels)

    for test_images, test_labels in test_ds:
        test_step(test_images, test_labels)

    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print(template.format(epoch+1,
                          train_loss.result(),
                          train_accuracy.result()*100,
                          test_loss.result(),
                          test_accuracy.result()*100))

SyntaxError: invalid syntax (1832316732.py, line 1)

# Case Study 1

In [None]:
class Agent:
    
    def __init__(self, state_size, is_eval = False, model_name = ""):
        self.state_size = state_size    # normalized previous days
        self.action_size = 3
        self.memory = deque(max_len = 1000)
        self.inventory = []
        self.model_name = model_name
        self.is_eval = is_eval
        
        self.gamma = 0.95
        self.epsilon = 1
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        
        self.model = load_model("models/"+model_name) if is_eval else self._model()
        
    
    def model(self):
        model = Sequential()
        modele.add(Dense(units=64, input_dim=self.state_size, activation="relu"))
        model.add(Dense(units=32, activation="relu"))
        model.add(Dense(units=8, activation="relu"))
        model.add(Dense(self.action_size, activation="linear"))
        model.compile(loss="mse", optimizer=Adam(lr=0.001))
        return model
    
    
    def act(self, state):
        if not self.is_eval and random.random() <= self.epsilon:
            return random.randrange(self.action_size)
        options = self.model_predict(state)
        return np.argmax(options[0])
    
    def expReplay(self, batch_size):
        mini_batch = []
        l = len(self.memory)
        
        # 1 : Prepare replay memory
        for i in range(1-batch_size+1, l):
            mini_batch.append(self.memory[i])
        
        # 2 : Loop accross the replay memory batch
        for state, action, reward, next_state, done in mini_batch:
            target = reward # reward or Q at time t
            
            # 3 : Update the target for Q table, table equation
            if not done:
                target = reward + self.gamma
                np.amax(self.model.predict(next_state)[0])
            
            # set_trace()
            
            # 4 : Update the output Q table for the given action in the table
            target_f[0][action] = target
            
            # 5: Update the output Q table for the given action in the table
            target_f[0][action] = target
            
            # 6: train adn fit the model
            self.model.fit(state, target_f, epochs=1, verbose=0)
        
        # 7 : Implement epsilon greedy algorithm
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
            

    

# Case Study 3

In [None]:
class CrytoEnvironment:
    
    def __init__(self, assets, capital):
        self.assets = assets
        self.capital = capital
        self.data = self.load_data()
        
    def load_data(self):
        Get_m_data()        
    
    def preprocess_state(self, state):
        return state
    
    def get_state(self, t, lookback, is_cov_matrix=True, is_raw_time_series=False):
        assert lookback <= t
        
        decision_making_state = self.data.iloc[t - lookback:t]
        decision_making_state = decision_making_state.pct_change().dropna()
        
        if is_cov_matrix:
            x = decision_making_state.cov()
            return x
        else:
            if is_raw_time_series:
                decision_making_state = self.data.iloc[t-lookback : t]
            return self.preprocess_state(decision_making_state)
    
    def get_reward(self, action, action_t, reward_t, alpha=0.01):
        
        def local_portfolio(returns, weights):
            weights = np.array(weights)
            rets = returns.mean()   # * 252
            covs = returns.cov()    # * 252
            P_ret = np.sum(rets * weights)
            P_vol = np.sqrt(np.dot(weights.T, np.dot(covs, weights)))
            P_sharpe = P_ret / P_vol
            return np.array([P_ret, P_vol, P_sharpe])
        
        data_period = self.data[action_t : reward_t]
        weights = action 
        returns = data_period.pct_change().dropna()
        
        sharpe = local_portfolio(returns, weights)[-1]
        sharpe = np.array([sharpe] * len(self.data.columns))
        ret = (data_period.values[-1] - data_period.valuesp[0]) / data_period.values[0]
        
        return np.dot(returns, weights), ret
    