# Learn $\gamma$ with Transformers

In [1]:
import os
import tensorflow as tf

import pandas as pd

from tensorflow.data import Dataset

from transformer_layers import *

In [2]:
tf.config.list_physical_devices(device_type='GPU')

[]

## Read Data Set

In [2]:
df = pd.read_csv('gamma_values.csv', names=['bstring', 'gamma'])
df.head()

Unnamed: 0,bstring,gamma
0,1001011101001001100110101001001011001011101000...,7.637653
1,1011001001101100100011110000100101001101110111...,7.350031
2,0011000110001101100101100001111101110111110111...,7.058517
3,0101100000100111000110111010101111101111110000...,7.825673
4,0111101111000111101110110110011001011011001101...,6.826776


In [3]:
def bstring_to_floats(bs) :
    '''
    Converts string of 0s and 1s to array of floats
    '''
    return np.array([float(b) for b in bs])

In [4]:
# split up input and output
X = df['bstring'].apply(bstring_to_floats).values
Y = df['gamma'].values

# convert to tensors
X = tf.convert_to_tensor(
    [tf.convert_to_tensor(x) for x in X]
)
Y = tf.convert_to_tensor(Y)

In [5]:
# form data set
ds = Dataset.from_tensor_slices((X, Y))

In [6]:
# split into train/val
train_frac = 0.9
trainN = int(train_frac*len(ds))
valN = len(ds) - trainN
ds = ds.shuffle(100000, seed=42)
train_ds = ds.take(trainN)
val_ds = ds.skip(trainN).take(valN)

# batch up data sets
train_ds = train_ds.batch(32)
val_ds = val_ds.batch(32)

# Set up Model

In [7]:
# architecture hyperparameters
num_layers = 2
d_model = 32
num_heads = 4
dff = 128
dense_hdim = 16
num_classes = 1
max_length = 1000
vocab_size = 2
dropout_rate = 0.1

# define model
model = Transformer(
    num_layers,
    d_model,
    num_heads,
    dff,
    dense_hdim=dense_hdim,
    num_classes=num_classes,
    max_length=max_length,
    vocab_size=vocab_size,
    dropout_rate=dropout_rate
)
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 1000)]            0         
                                                                 
 encoder (Encoder)           (None, 1000, 32)          50624     
                                                                 
 global_average_pooling1d (G  (None, 32)               0         
 lobalAveragePooling1D)                                          
                                                                 
 dense_4 (Dense)             (None, 16)                528       
                                                                 
 dense_5 (Dense)             (None, 1)                 17        
                                                                 
 tf.compat.v1.squeeze (TFOpL  None                     0         
 ambda)                                                      

## Train

In [8]:
# custom loss
def linear_scaled_loss(y, p) :
    return y*(p - y)**2

In [9]:
# choose compile parameters
loss = linear_scaled_loss #tf.keras.losses.MeanSquaredError()
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)

# compile
model.compile(loss=loss, optimizer=optimizer)

In [10]:
history = model.fit(
    x=train_ds,
    validation_data=val_ds,
    epochs=30
)

Epoch 1/30
 14/563 [..............................] - ETA: 22:42 - loss: 207.6652

KeyboardInterrupt: 