In [None]:
path = r'D:\KU_Spring2022\Cosmology'

In [None]:
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from tensorflow.keras import layers, regularizers
import matplotlib.pyplot as plt 

In [None]:
cutoff = 11100

master_rshift = []
master_dist_mod = [] #errors added on 
master_glat = [] 
master_glon = [] 
bulkflows = [] 
destination = path + '/data'

for file in os.listdir(destination):
    
    data = np.load(destination + '/' + file)
    
    master_rshift.append(data['redshift'][:cutoff])
    master_dist_mod.append(data['distm'][:cutoff] + data['distmerr'][:cutoff])
    master_glat.append(data['glat'][:cutoff])
    master_glon.append(data['glon'][:cutoff])
    bulkflows.append(data['bulkflow'])
    
master_rshift = np.array(master_rshift) 
master_dist_mod = np.array(master_dist_mod)
master_glat = np.array(master_glat)
master_glon = np.array(master_glon)
bulkflows = np.array(bulkflows)

master_rshift = master_rshift / tf.math.reduce_max(master_rshift, axis=1, keepdims=True)
master_dist_mod = master_dist_mod / tf.math.reduce_max(master_dist_mod, axis=1, keepdims=True)
master_glat = master_glat / tf.math.reduce_max(master_glat, axis=1, keepdims=True)
master_glon = master_glon / tf.math.reduce_max(master_glon, axis=1, keepdims=True)

print(f'master_rshift.shape: {master_rshift.shape}')
print(f'master_dist_mod.shape: {master_dist_mod.shape}')
print(f'master_glat.shape: {master_glat.shape}')
print(f'master_glon.shape: {master_glon.shape}')
print(f'bulkflows.shape: {bulkflows.shape}')

print()


dataset = np.concatenate((master_rshift[:, :, np.newaxis], master_dist_mod[:, :, np.newaxis], master_glat[:, :, np.newaxis], master_glon[:, :, np.newaxis]), axis=2)
labels = bulkflows


#shuffle
index=np.arange(len(dataset))
np.random.shuffle(index)
dataset = dataset[index]
labels = labels[index]


print(f'dataset.shape: {dataset.shape}')
print(f'labels.shape: {labels.shape}')
print()

#create train set and validation set
valid_index = int(len(dataset)*0.8)
x_train = dataset[:valid_index]
y_train = labels[:valid_index]

x_valid_test = dataset[valid_index:]
y_valid_test = labels[valid_index:]


test_index = int(len(x_valid_test)*0.5)
x_valid = x_valid_test[:test_index]
y_valid = y_valid_test[:test_index]

x_test = x_valid_test[test_index:]
y_test = y_valid_test[test_index:]

print(f'x_train.shape: {x_train.shape}')
print(f'y_train.shape: {y_train.shape}')
print()
print(f'x_valid.shape: {x_valid.shape}')
print(f'y_valid.shape: {y_valid.shape}')
print()
print(f'x_test.shape: {x_test.shape}')
print(f'y_test.shape: {y_test.shape}')

In [None]:
BATCH_SIZE = 50
AUTOTUNE = tf.data.experimental.AUTOTUNE

train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train))
valid_ds = tf.data.Dataset.from_tensor_slices((x_valid, y_valid))
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test))

train_ds = train_ds.cache().shuffle(x_train.shape[0]).batch(BATCH_SIZE).prefetch(AUTOTUNE)
valid_ds = valid_ds.cache().shuffle(x_valid.shape[0]).batch(BATCH_SIZE).prefetch(AUTOTUNE)

In [None]:
# just to see the shape of our batched data samples
x, y = next(iter(train_ds))
print(x.shape)
print(y.shape)

print()

x, y = next(iter(valid_ds))
print(x.shape)
print(y.shape)
print()

x, y = next(iter(test_ds))
print(x.shape)
print(y.shape)

In [None]:
normalizer = layers.Normalization()
normalizer.adapt(x_train)

model = tf.keras.Sequential([
    tf.keras.Input(shape=(11100, 4)),
    normalizer,
    layers.Conv1D(16, kernel_size=4, strides=4, kernel_regularizer=regularizers.l2(0.01)),
    #layers.MaxPooling1D(),
    layers.BatchNormalization(),
    layers.ReLU(),
    layers.Conv1D(32,kernel_size=4, strides=4, kernel_regularizer=regularizers.l2(0.01)),
    layers.BatchNormalization(),
    layers.ReLU(),
    layers.Conv1D(64, kernel_size=4, strides=4, kernel_regularizer=regularizers.l2(0.01)),
    layers.BatchNormalization(),
    layers.ReLU(),
    layers.Conv1D(128, kernel_size=4, strides=4, kernel_regularizer=regularizers.l2(0.01)),
    layers.BatchNormalization(),
    layers.ReLU(),
    layers.Conv1D(256, kernel_size=4, strides=2, kernel_regularizer=regularizers.l2(0.01)),  
    layers.BatchNormalization(),
    layers.ReLU(),
    layers.Conv1D(512, kernel_size=4, strides=1, kernel_regularizer=regularizers.l2(0.01)),
    layers.Flatten(),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(16),
    layers.Dropout(0.1),
    layers.Dense(3)
])
model.summary()

In [None]:
def scheduler(epoch, lr):
  if epoch < 8:
    return lr
  else: 
    return lr*0.6

learning_rate_scheduler = tf.keras.callbacks.LearningRateScheduler(scheduler, verbose=1)

In [None]:
num_epochs=10
optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001)
model.compile(loss='mean_squared_error', optimizer=optimizer, metrics=['accuracy'])
history = model.fit(train_ds, epochs=num_epochs, verbose=2, batch_size=BATCH_SIZE, validation_data=valid_ds, callbacks=[learning_rate_scheduler])

In [None]:
train_acc = history.history['accuracy']
valid_acc = history.history['val_accuracy']
plt.plot(np.arange(num_epochs), train_acc)
plt.plot(np.arange(num_epochs), valid_acc)
plt.legend(['train', 'validation'])
plt.show()


train_loss = history.history['loss']
valid_loss = history.history['val_loss']
plt.plot(np.arange(num_epochs), train_loss)
plt.plot(np.arange(num_epochs), valid_loss)
plt.legend(['train', 'validation'])
plt.show()

In [None]:
model.evaluate(test_ds)

In [None]:
y_pred = model.predict(x_test)
n=10
print(y_pred[n])
print('=====================')
print(y_test[n])

In [None]:
model.save('some path/name')
model = keras.models.load_model('some path/name')