In [None]:
try:
    from google.colab import drive
    COLAB_NOTEBOOK = True
except:
    COLAB_NOTEBOOK = False
if COLAB_NOTEBOOK:
    drive.mount("/content/gdrive", force_remount=True)
import tensorflow as tf
# tf.debugging.set_log_device_placement(True)
# tf.config.set_visible_devices([], 'GPU') 
import h5py
import random
import time
tf.executing_eagerly() 


In [20]:
# !wget https://drive.google.com/file/d/1x7ieFTAYVlPe9xMvFAqx6oZvUZ4DLbdf/view?usp=sharing
# !pip install gdown
if COLAB_NOTEBOOK:
    import gdown

    # url = 'https://drive.google.com/uc?id=0B9P1L--7Wd2vNm9zMTJWOGxobkU'
    url = 'https://drive.google.com/uc?id=1x7ieFTAYVlPe9xMvFAqx6oZvUZ4DLbdf'
    # url = 'http://opendata.deepsig.io/datasets/2018.01/2018.01.OSC.0001_1024x2M.h5.tar.gz'
    output = 'GOLD_XYZ.hdf5'
    gdown.download(url, output, quiet=False)
!df -h
!ls -l



Filesystem      Size  Used Avail Use% Mounted on
udev            3.9G     0  3.9G   0% /dev
tmpfs           794M  2.0M  792M   1% /run
/dev/sda8       216G  134G   72G  66% /
tmpfs           3.9G  330M  3.6G   9% /dev/shm
tmpfs           5.0M  4.0K  5.0M   1% /run/lock
tmpfs           3.9G     0  3.9G   0% /sys/fs/cgroup
/dev/loop0       92M   92M     0 100% /snap/core/8689
/dev/loop1      4.4M  4.4M     0 100% /snap/gnome-calculator/704
/dev/loop2      1.0M  1.0M     0 100% /snap/gnome-logs/93
/dev/loop3       68M   68M     0 100% /snap/sublime-text/85
/dev/loop4       15M   15M     0 100% /snap/gnome-characters/495
/dev/loop5      161M  161M     0 100% /snap/gnome-3-28-1804/116
/dev/loop6      3.8M  3.8M     0 100% /snap/gnome-system-monitor/135
/dev/loop7       55M   55M     0 100% /snap/core18/1668
/dev/loop11     198M  198M     0 100% /snap/viber-unofficial/37
/dev/loop13     348M  348M     0 100% /snap/pycharm-community/188
/dev/loop14     4.3M  4.3M     0 100% /snap/gnome-calcul

In [None]:
# drive.flush_and_unmount()

In [None]:
# Install sonnet (it doesn't come preinstalled in the colab VM).
if COLAB_NOTEBOOK:
    !pip install -q dm-sonnet

!grep Model: /proc/driver/nvidia/gpus/*/information | awk '{$1="";print$0}'


In [None]:
# import tensorflow as tf
# tf.debugging.set_log_device_placement(True)
# import h5py
import numpy as np
import matplotlib.pyplot as plt
import random
import tqdm
import time
import sonnet as snt
from tqdm import tqdm

print("TensorFlow version: {}".format(tf.__version__))
print("    Sonnet version: {}".format(snt.__version__))
print("    Numpy  version: {}".format(np.__version__))
from tensorboard import version; print("TensorBoard version: {}".format(version.VERSION))
# tf.config.list_physical_devices('GPU')

In [None]:
class ConvMaxPool(snt.Module):
    def __init__(self, name=None):
        super(ConvMaxPool, self).__init__(name=name)
        self.conv = snt.Conv1D(output_channels=64, kernel_shape=64, stride=1, rate=1,
                               padding="SAME", with_bias=True,
                               data_format="NWC", name="conv_max_pool"
                               )

    def __call__(self, inputs):
        features = self.conv(inputs)
        outputs = tf.nn.max_pool1d(features, ksize=2, strides=2, padding="VALID", 
                                   data_format='NWC', name="pool") #64×1024
        return outputs


class CNN(snt.Module):
    def __init__(self, epsilon, name=None):
        super(CNN, self).__init__(name=name)
        self.layers = [
            ConvMaxPool(name="conv"+str(ii))
            for ii in range(6)
        ]
        self.conv_in = snt.Conv1D(output_channels=64, kernel_shape=2, stride=1, rate=1,
                       padding="SAME", with_bias=True,
                       data_format="NWC", name="conv_input"
                       )

        self.linear = snt.Linear(output_size=128, with_bias=True)
        self.linear1 = snt.Linear(output_size=128, with_bias=True)
        self.linear2 = snt.Linear(output_size=24, with_bias=True)        
        self.decay_lr = 0
        self.lr=2e-4


    # @snt.once
    def __initialize(self, weight):
        pass

    def __call__(self, inputs):
        features = self.conv_in(inputs) #2×1024
        features = tf.nn.max_pool1d(features, ksize=2, strides=2, padding="VALID", 
                                    data_format='NWC', name="pool_input") 
#         print(features)
        for layer in self.layers:
            features = layer(features)    
        features = snt.flatten(features)

        features = self.linear(features)
        features = tf.nn.relu(features)
        features = self.linear1(features)
        features = tf.nn.relu(features)

        output = self.linear2(features)        
#         output = tf.nn.softmax(features)
        return output

opt = snt.optimizers.Adam(learning_rate=0.001, beta1=0.)

def step(batch, labels):
    with tf.GradientTape() as tape:
        logits = radio_model(batch)
#         loss = cross_entropy_loss(logits, labels)
        loss = tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits)
        loss = tf.reduce_mean(loss)

    params = radio_model.trainable_variables
    grads = tape.gradient(loss, params)        
    
#     if self.decay_lr:
#       self.lr.assign(self.init_lr * lr_mult)
    
    opt.apply(grads, params)
    return loss

def cross_entropy_loss(logits, target):
    return -tf.reduce_sum(target*tf.math.log(logits))



In [None]:
class radioML:
    def __init__(self, file, batch_size):
        self.file = file
        self.batch_size = batch_size

    def __call__(self):
        with h5py.File(self.file, 'r') as f:
            datasize = len(f['X'])
            while True:
                idxs = random.sample(range(datasize), self.batch_size)
                yield (f['X'][sorted(idxs)], f['Y'][sorted(idxs)])
                # f.close()

def radioML_map(data_batch, data_label):
#     print(data_batch.shape)
    data_batch = tf.reshape(data_batch, [1024, 2])
#     mean = tf.math.reduce_mean(data_batch)
#     std = tf.math.reduce_std(data_batch)
#     data_batch = (data_batch - mean)/std
#     print(std, mean)
#     print(type(std))
    return (data_batch, data_label)

In [None]:
radio_model = CNN(epsilon=1e-8, name="CNN")
print(radio_model)


In [None]:
if COLAB_NOTEBOOK:
    hdf5_path = '/content/gdrive/My Drive/data/GOLD_XYZ.hdf5'
else:          
    hdf5_path = '/media/bognev/CE50072F50071DB9/2018.01/GOLD_XYZ_OSC.0001_1024.hdf5'
    
    


radio_plot = tf.data.Dataset.from_generator(radioML(hdf5_path, 1), 
                output_types=(tf.float32, tf.float32),
                output_shapes=(tf.TensorShape([1, 1024, 2]), tf.TensorShape([1, 24]))) #2555904, 1024, 2]

radio_plot = radio_plot.take(1)
radio_plot = radio_plot.map(radioML_map)
for (batch, labels) in radio_plot:    
    print(batch.shape)
    fig1 = plt.figure()
    plt.plot(batch[:,0])
    plt.xlabel('Batch #')
    plt.ylabel('Loss [entropy]')
    fig2 = plt.figure()
    plt.plot(batch[:,1])
    plt.xlabel('Batch #')
    plt.ylabel('Loss [entropy]')
    plt.show()

In [None]:
%reload_ext tensorboard
# %load_ext tensorboard
# import os
from datetime import datetime as dt
# Clear any logs from previous runs
!rm -rf ./logs/
log_dir = "logs/scalars/" + dt.now().strftime("%Y%m%d-%H%M%S")
writer = tf.summary.create_file_writer(log_dir)


writer.set_as_default()

In [None]:

batch_size = 128
num_batches = 256
num_epochs = 10

t = tqdm(range(num_batches * num_epochs),
                                unit='sig', unit_scale=batch_size,
                                position=0)


radio_data = tf.data.Dataset.from_generator(radioML(hdf5_path, 1), 
                output_types=(tf.float32, tf.float32),
                output_shapes=(tf.TensorShape([1, 1024, 2]), tf.TensorShape([1, 24]))) #2555904, 1024, 2]

loss_history = []
loss = 0
step_num=0
radio_data = radio_data.take(batch_size*num_batches)
radio_data = radio_data.map(radioML_map)
radio_data = radio_data.batch(batch_size)
radio_data = radio_data.prefetch(tf.data.experimental.AUTOTUNE)
radio_data = radio_data.repeat(num_epochs)
# radio_data = radio_data.cache()
start_time = time.perf_counter()
for step_num,(batch, labels) in enumerate(radio_data):      
    epoch = tf.constant(int(step_num / (batch_size*num_batches)))
    loss = step(batch, labels)
    step_num+=1
    with writer.as_default():
        tf.summary.scalar('training loss', loss, step=i)
    
    t.update(1)
    if step_num and (step_num % batch_size*num_batches == 0):
        t.write('\nEpoch = {}/{} (lr_mult = {:0.02f}, loss = {}) done.'.format(
            epoch.numpy(), num_epochs, 0.001, loss.numpy()))
tf.print("Execution time:", time.perf_counter() - start_time)   
t.close()
# tf.print("batch.shape: ", i, batch.shape)  
# tf.print(loss_history)                        

In [19]:
%tensorboard --logdir logs/scalars

Reusing TensorBoard on port 6006 (pid 10963), started 0:14:22 ago. (Use '!kill 10963' to kill it.)

In [None]:
tf.math.is_nan(loss_history)
loss_history = loss_history
fig2 = plt.figure()
plt.plot(loss_history)
plt.xlabel('Batch #')
plt.ylabel('Loss [entropy]')
plt.show()
loss_history

In [None]:
print(snt.format_variables(radio_model.variables))