In [None]:
import numpy as np
import math
import random
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt

### Create a linear layer class in Keras

In [None]:
class Linear(keras.layers.Layer):
    
    def __init__(self, units=32, input_dim=32):
        super().__init__()
        self.w = self.add_weight(shape=(input_dim, units), initializer='random_normal', trainable=True)
        self.b = self.add_weight(shape=(units, ), initializer='zeros', trainable=True)
        
    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b

In [None]:
tf.ones((2,2))

In [None]:
linear_layer = Linear(4,2)

In [None]:
linear_layer(tf.ones((2,2)))

### Use build method to avoid having to specify input_dim

In [None]:
class Linear(keras.layers.Layer):
    
    def __init__(self, units=32):
        super().__init__()
        self.units = units
        
    def build(self, input_shape):
        self.w = self.add_weight(shape=(input_shape[-1], self.units), initializer='random_normal', trainable=True)
        self.b = self.add_weight(shape=(self.units, ), initializer='zeros', trainable=True)
        
    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b

In [None]:
class MLP(keras.layers.Layer):
    
    def __init__(self):
        super().__init__()
        self.linear_1 = Linear(32)
        self.linear_2 = Linear(32)
        self.linear_3 = Linear(10)
        
    def call(self, inputs):
        x = self.linear_1(inputs)
        x = tf.nn.relu(x)
        x = self.linear_2(x)
        x = tf.nn.relu(x)
        return self.linear_3(x)

In [None]:
#get maximum column value of each row

In [None]:
mlp = MLP()

In [None]:
y = mlp(tf.ones((2,2)))

In [None]:
y

In [None]:
class ActivityRegularization(keras.layers.Layer):
    
    def __init__(self, rate=1e-2):
        super().__init__()
        self.rate = rate
    
    def call(self, inputs):
        self.add_loss(self.rate * tf.reduce_sum(inputs))
        return inputs

In [None]:
tf.reduce_sum(tf.ones((14,10)))

In [None]:
mlp.losses

In [None]:
class SparseMLP(keras.layers.Layer):
    
    def __init__(self):
        super().__init__()
        self.linear_1 = Linear(128)
        self.regularization = ActivityRegularization(1e-2)
        self.linear_3 = Linear(2)
    
    def call(self, inputs):
        x = self.linear_1(inputs)
        x = tf.nn.relu(x)
        x = self.regularization(x)
        return self.linear_3(x)

In [None]:
mlp = SparseMLP()
y = mlp(tf.ones((200, 4)))

In [None]:
mlp.losses

In [None]:
# write training loop for sparse mlp
# get dataset

# DQN
# collect transitions and go through network update process
# get max value of state-action pairs

In [None]:
(X_train, y_train), _ = keras.datasets.mnist.load_data()

In [None]:
dataset = tf.data.Dataset.from_tensor_slices(
    (X_train.reshape(60000, 784).astype('float32') / 255, y_train)
)
dataset = dataset.shuffle(buffer_size=1024).batch(64)

In [None]:
dataset

In [None]:
# access different elements of dataset
# get dataset of Transitions

In [None]:
linear_layer = Linear(10)

In [None]:
loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = keras.optimizers.SGD(learning_rate=1e-3)

In [None]:
for step, (x, y) in enumerate(dataset):
    with tf.GradientTape() as tape:
        logits = linear_layer(x)
        loss = loss_fn(y, logits)

In [None]:
for step, (x, y) in enumerate(dataset):
    print(step, x, y)

In [None]:
# how can you use this to create a Q-function? 

In [None]:
gradients = tape.gradient(loss, linear_layer.trainable_weights)

In [None]:
gradients

In [None]:
optimizer.apply_gradients(zip(gradients, linear_layer.trainable_weights))

In [None]:
tf.ones((5,12))

In [None]:
tf.random.normal((5, 12))

In [None]:
# get array/tensor as input to model
tf.random.normal((303,5))

In [None]:
def get_basic_model():
    model = tf.keras.Sequential([
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(1)
  ])

    model.compile(optimizer='adam',
                loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                metrics=['accuracy'])
    return model

In [None]:
model = get_basic_model()

In [None]:
model.fit(tf.random.normal((303,5)), tf.random.normal((303,)), epochs=15, batch_size=128)

In [None]:
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split

X = np.random.random((5000,22))
y = np.random.random((5000,1))
 
X_train,X_test, y_train,y_test = train_test_split(X,y)
 
dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_data = dataset.shuffle(len(X_train)).batch(32)
train_data = train_data.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
 
valid_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test))
valid_data = valid_ds.batch(32) 

def create_model():
    tfkl = tf.keras.layers
    inp = tf.keras.Input(shape=(2,))
    x = tfkl.Dense(128, activation="linear")(inp)
    x = tfkl.Dense(64, activation="linear")(x)
    x = tfkl.Dense(1, activation="linear")(x)
    
    model = tf.keras.models.Model(inp, x)
    model.compile(loss="mae", optimizer="adam", metrics=["mae"])
    return model

model=create_model()
model.summary()

model.fit(train_data, epochs=3, validation_data=valid_data) 

In [None]:
def get_basic_model():
    model = tf.keras.Sequential([
    tf.keras.Input(shape=(2,)),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(1)
  ])

    model.compile(optimizer='adam',
                loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                metrics=['accuracy'])
    return model

model = get_basic_model()
model.fit(tf.random.normal((300,22)), tf.random.normal((300,)), epochs=15, batch_size=128)

In [None]:
x = tf.random.normal(shape=(2, 2))

with tf.GradientTape() as tape:
    tape.watch(x)
    y = tf.sin(x) 
    grads = tape.gradient(y, x)
    print(grads)

In [None]:
tf.cos(x)

In [None]:
model.trainable_weights

In [None]:

with tf.GradientTape() as tape:
    tape.watch(x)
    y = tf.sin(x) 
    grads = tape.gradient(y, x)
    print(grads)

In [None]:
loss_function = keras.losses.MeanSquaredError()
optimizer = keras.optimizers.Adam(learning_rate=0.00025, clipnorm=1.0)

x = tf.random.normal(shape=(2, 2))
y = tf.sin(x) 

for i in range(100):
    with tf.GradientTape() as tape:
        tape.watch(x)
        y_pred = model(x)
        loss = loss_function(y, y_pred)
        grads = tape.gradient(loss, model.trainable_weights) # what to use here to get correct weights
        print(grads)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))

In [None]:
class SparseMLP(keras.layers.Layer):
    """Stack of Linear layers with a sparsity regularization loss."""

    def __init__(self):
        super().__init__()
        self.linear_1 = Linear(32)
        self.regularization = ActivityRegularization(1e-2)
        self.linear_3 = Linear(10)

    def call(self, inputs):
        x = self.linear_1(inputs)
        x = tf.nn.relu(x)
        x = self.regularization(x)
        return self.linear_3(x)


model = SparseMLP()
y = model(tf.ones((10, 10)))

print(model.losses)  # List containing one float32 scalar

(x_train, y_train), _ = keras.datasets.mnist.load_data()
dataset = tf.data.Dataset.from_tensor_slices(
    (x_train.reshape(60000, 784).astype("float32") / 255, y_train)
)
dataset = dataset.shuffle(buffer_size=1024).batch(64)

In [None]:
model.trainable_weights

In [None]:
loss_function = keras.losses.MeanSquaredError()
optimizer = keras.optimizers.Adam(learning_rate=0.00025, clipnorm=1.0)

x = tf.random.normal(shape=(2, 2))
y = tf.sin(x) 

for i in dataset:
    with tf.GradientTape() as tape:
        tape.watch(x)
        y_pred = model(x)
        loss = loss_function(y, y_pred)
        grads = tape.gradient(loss, model.trainable_weights) # what to use here to get correct weights
        print(grads)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))