## Implements a CNN that processes weak lensing outputs

In [None]:
# extract tarfiles if necessary and set specs for run

In [24]:

import tarfile
import os
import shutil
from astropy.io import fits
import numpy as np
from scipy.ndimage import zoom
import re
import tensorflow as tf
import random


# Specify the directory containing the .tar files
directory_path = '/Users/matt/Dropbox/learning_neuralnets/colombialensing/'


max_cosmologies = -1 #for testing.  Set to -1 to use everything.
max_examples = 100 #examples in a cosmology, should divide by number_batches
number_batches = 10
validation_split = 0.4  # use this fraction of the data for validation

#number_examples = max_examples*(1+validation_split)

# image_size
image_size = 256
number_fits_files = 512
suffix = f"_{image_size}"
extract_tarfiles = 0  #if I need to extract tarfiles

#extracts only if indicated (could make this more elegant by checking to see if they exist)
if extract_tarfiles:
    # Use a regular expression to match .tar files with the desired suffix
    pattern = re.compile(rf"{suffix}.tar$")

    # List all matching .tar files in the directory
    all_tar_files = [f for f in os.listdir(directory_path) if pattern.search(f)]

    # Extract the tar archive
    for tar_file in all_tar_files:
        print(tar_file)
        tar_file_path = os.path.join(directory_path, tar_file)
        with tarfile.open(tar_file_path, 'r') as archive:
            archive.extractall(path=directory_path)
    

# Read into memory the data

In [26]:
def get_labels_for_file(dir_name):
    """
    Extracts labels from the tar file name.
    For the file "Om0.183_si0.958_256.tar", the labels will be [0.183, 0.958].
    
    Args:
    - tar_file_name (str): Name of the tar file.
    
    Returns:
    - list: List containing the two labels extracted from the filename.
    """
    # Split the filename on underscores
    parts = dir_name.split('_')

    # Extract the numeric values for 'Om' and 'si'
    om_label = float(parts[0][2:])
    si_label = float(parts[1][2:])
    
    return [om_label, si_label]


#now loop through all files in the   
pattern = re.compile(rf"{suffix}$")
all_directories = [f for f in os.listdir(directory_path) if pattern.search(f)]

random.shuffle(all_directories) #this makes it so that there is no particular order for the directories
#print(all_directories)

#tensor of labels; there are two labels for each
batch_labels = np.empty((len(all_directories), number_fits_files, 2), dtype=np.float16)

RMS =0 #first time set to zero
data_array = np.empty((len(all_directories), number_fits_files, image_size, image_size), dtype=np.float16)
for idy, dir_name in enumerate(all_directories):
    if max_cosmologies>0 and idy >= max_cosmologies:
        break
        
    print("reading in", dir_name)
    dir_path = os.path.join(directory_path, dir_name)

    all_files = os.listdir(dir_path)
    fits_files = [f for f in all_files if f.endswith('.fits')]



    for idx, file in enumerate(fits_files):
        with fits.open(os.path.join(dir_path, file)) as hdul:
            
            original_data = hdul[0].data

            if RMS == 0: #get RMS to divide by for first file to normalize everything
                RMS = np.sqrt(np.var(hdul[0].data))
                print(f"RMS={RMS}")
            
            ##get rid of NANs, which affects a few files
            #if np.isnan(original_data).any():
            #    continue
            #I've cleaned this out already
    
    
            data_array[idy][idx] = original_data  #/RMS --somehow this divsion causes issues.  Not sure why
    #since all fits files in one directory have the same label
    labels = get_labels_for_file(dir_name)
    batch_labels[idy] = np.array([labels for i in range(number_fits_files)])
    

    
WL_labels = tf.convert_to_tensor(batch_labels)
WL_tensor = tf.convert_to_tensor(data_array)

#archive.extractall(path=directory_path)        
#dir_name = os.path.splitext(tar_file)[0]
#dir_path = os.path.join(directory_path, dir_name)

reading in Om0.363_si1.133_256
RMS=0.0347621813416481
reading in Om0.375_si0.332_256
reading in Om0.317_si0.916_256
reading in Om0.189_si0.878_256
reading in Om0.340_si0.746_256
reading in Om0.268_si0.820_256
reading in Om0.217_si0.842_256
reading in Om0.251_si0.807_256
reading in Om0.283_si0.805_256
reading in Om0.224_si1.013_256
reading in Om0.582_si0.652_256
reading in Om0.249_si0.764_256
reading in Om0.292_si0.835_256
reading in Om0.195_si1.095_256
reading in Om0.349_si0.274_256
reading in Om0.610_si0.397_256
reading in Om0.273_si1.204_256
reading in Om0.361_si0.935_256
reading in Om0.274_si0.786_256
reading in Om0.291_si0.775_256
reading in Om0.450_si0.796_256
reading in Om0.294_si0.991_256
reading in Om0.275_si0.766_256
reading in Om0.315_si0.717_256
reading in Om0.403_si0.757_256
reading in Om0.261_si0.802_256
reading in Om0.313_si0.633_256
reading in Om0.253_si0.589_256
reading in Om0.259_si0.875_256
reading in Om0.215_si0.878_256
reading in Om0.195_si0.994_256
reading in Om0.2

In [15]:
#Just testing arrays

get_labels_for_file('Om0.264_si0.768_256')

print(tf.shape(WL_tensor))

print(tf.shape(WL_labels))

print(WL_tensor[0][0])

print(WL_labels[0][0])

tf.Tensor([ 93 512 256 256], shape=(4,), dtype=int32)
tf.Tensor([ 93 512   2], shape=(3,), dtype=int32)
tf.Tensor(
[[ 0.009      0.004784   0.003347  ... -0.001416  -0.001376   0.02293  ]
 [-0.002459   0.00108    0.005787  ...  0.02527    0.01025    0.01704  ]
 [-0.004765  -0.001403   0.00821   ...  0.00551    0.004086   0.001116 ]
 ...
 [-0.00762   -0.00821   -0.01585   ... -0.002838   0.002556   0.013275 ]
 [ 0.002367   0.00891   -0.00389   ...  0.001995  -0.0004766  0.01472  ]
 [ 0.0296     0.04944    0.005     ...  0.003056  -0.001373   0.002777 ]], shape=(256, 256), dtype=float16)
tf.Tensor([0.322 0.482], shape=(2,), dtype=float16)


## Implement CNN

In [16]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, BatchNormalization

 #make less redundnat

def create_cnn_model(input_shape):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    #model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    #model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dense(2, activation='softmax'))  
    
    return model

model = create_cnn_model((image_size, image_size, 1))  # Assuming grayscale images


In [17]:
model.compile(optimizer='adam', loss='mse', metrics=['mean_absolute_error', 'MAPE'])

In [18]:
model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_10 (Conv2D)          (None, 254, 254, 32)      320       
                                                                 
 max_pooling2d_10 (MaxPoolin  (None, 127, 127, 32)     0         
 g2D)                                                            
                                                                 
 conv2d_11 (Conv2D)          (None, 125, 125, 64)      18496     
                                                                 
 max_pooling2d_11 (MaxPoolin  (None, 62, 62, 64)       0         
 g2D)                                                            
                                                                 
 flatten_5 (Flatten)         (None, 246016)            0         
                                                                 
 dense_10 (Dense)            (None, 64)               

# Execute simple CNN

In [38]:
from tensorflow.keras.utils import Sequence
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError



# Define a custom data generator
class WLDataGenerator(Sequence):
    def __init__(self, WL_tensor, WL_labels, batch_size):
        self.WL_tensor = WL_tensor
        self.WL_labels = WL_labels
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(self.WL_tensor.shape[0]*WL_tensor.shape[1] / self.batch_size))

    def __getitem__(self, idx):
        batch_x = self.WL_tensor[:max_cosmologies, idx * self.batch_size:(idx + 1) * self.batch_size, :, :]
        batch_x  = tf.reshape(batch_x, (-1, batch_x .shape[2], batch_x.shape[3]))
        batch_x = batch_x[..., np.newaxis]  # Add channel dimension
        batch_y = self.WL_labels[:max_cosmologies, idx * self.batch_size:(idx + 1) * self.batch_size, :]
        batch_y  = tf.reshape(batch_y, (-1, batch_y.shape[2]))
        return batch_x, batch_y

# Parameters
number_fits_files_train = int((1-validation_split)*number_fits_files)
batch_size = min(max_examples, number_fits_files_train) // number_batches  # Splitting into 10 batches
training_data_size = number_batches*batch_size
validation_data_size = min(int(validation_split*training_data_size), number_fits_files- number_fits_files_train)
#validation_data_size = (validation_data_size// number_batches)*number_batches

#create a new CNN model
model2 = create_cnn_model((image_size, image_size, 1))  # Assuming grayscale images
model2.compile(optimizer='adam', loss='mse', metrics=['mean_absolute_error'], run_eagerly=True)

# Create training data set

train_gen = WLDataGenerator(WL_tensor[:, :training_data_size, :, :], WL_labels[:, :training_data_size, :], batch_size)

val_gen = WLDataGenerator(WL_tensor[:, training_data_size:training_data_size+validation_data_size, :, :], \
                          WL_labels[:, training_data_size:training_data_size+validation_data_size, :], batch_size)


# Train the model using the data generator
model2.fit(train_gen, validation_data=val_gen, epochs=10)  # Train for 5 epochs as an example. You can adjust this.


Epoch 1/10


2023-10-26 15:40:16.962856: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


  30/4711 [..............................] - ETA: 12:24 - loss: 0.0000e+00 - mean_absolute_error: 0.0000e+00

KeyboardInterrupt: 

In [39]:
print("Training data size:", training_data_size)
print("Validation data size:", validation_data_size)
print("Total data size:", WL_tensor.shape[1])

print(len(val_gen), "Number of batches in val_gen:", len(val_gen))
for i in range(len(val_gen)):
    batch_x, batch_y = val_gen[i]
    predictions = model2.predict(batch_x)

Training data size: 100
Validation data size: 40
Total data size: 512
Number of batches in val_gen: 4711


ValueError: Unexpected result of `predict_function` (Empty batch_outputs). Please use `Model.compile(..., run_eagerly=True)`, or `tf.config.run_functions_eagerly(True)` for more information of where went wrong, or file a issue/bug to `tf.keras`.

In [30]:
import matplotlib.pyplot as plt

# Lists to store all labels and predictions
all_labels = []
all_predictions = []

# Loop through validation data
for batch_x, batch_y in val_gen:
    predictions = model2.predict(batch_x)
    print("predictions = ", np.shape(predictions), np.shape(batch_y))
    all_labels.append(batch_y)
    all_predictions.append(predictions)

# Convert lists to numpy arrays
all_labels = np.concatenate(all_labels, axis=0)
all_predictions = np.concatenate(all_predictions, axis=0)

# Calculate fractional difference
fractional_difference = (all_predictions - all_labels) / (all_labels + 1e-8)  # added small value to avoid division by zero

# Plot
plt.figure(figsize=(10, 6))
plt.hist(fractional_difference.flatten(), bins=50, range=(-1, 1), alpha=0.7)
plt.title('Fractional Difference Distribution')
plt.xlabel('Fractional Difference')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()


predictions =  (920, 2) (920, 2)
predictions =  (920, 2) (920, 2)
predictions =  (920, 2) (920, 2)
predictions =  (920, 2) (920, 2)


ValueError: Unexpected result of `predict_function` (Empty batch_outputs). Please use `Model.compile(..., run_eagerly=True)`, or `tf.config.run_functions_eagerly(True)` for more information of where went wrong, or file a issue/bug to `tf.keras`.

In [44]:
import numpy as np
import matplotlib.pyplot as plt

# Lists to store all labels and predictions
all_labels = []
all_predictions = []

# Loop through validation data
for batch_x, batch_y in val_gen:
    #print(batch_x, batch_y)
    predictions = model2.predict(batch_x)
    all_labels.append(batch_y)
    all_predictions.append(predictions)

# Convert lists to numpy arrays
all_labels = np.concatenate(all_labels, axis=0)
all_predictions = np.concatenate(all_predictions, axis=0)

# Calculate fractional difference
fractional_difference = (all_predictions - all_labels) / (all_labels + 1e-8)  # added small value to avoid division by zero

# Extract fractional differences for each label
fractional_difference_label1 = fractional_difference[:, 0]
fractional_difference_label2 = fractional_difference[:, 1]

# Plot
plt.figure(figsize=(10, 6))
plt.scatter(fractional_difference_label1, fractional_difference_label2, alpha=0.5)
plt.title('Fractional Difference Scatter Plot')
plt.xlabel('Fractional Difference for Label 1')
plt.ylabel('Fractional Difference for Label 2')
plt.grid(True)
plt.show()



ValueError: Unexpected result of `predict_function` (Empty batch_outputs). Please use `Model.compile(..., run_eagerly=True)`, or `tf.config.run_functions_eagerly(True)` for more information of where went wrong, or file a issue/bug to `tf.keras`.

In [41]:
print(batch_size, validation_data_size)

10 20


In [51]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Load the ResNet50 model with weights pre-trained on ImageNet
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(image_size, image_size, 3))

# Freeze the layers of the base model
for layer in base_model.layers:
    layer.trainable = False

# Add custom layers on top of ResNet50
x = base_model.output
x = GlobalAveragePooling2D()(x)  # Global average pooling layer
x = Dense(256, activation='relu')(x)  # Dense layer
predictions = Dense(2, activation='linear')(x)  # Final dense layer with 2 outputs for your 2 labels

# Construct the full model
modelPreTrained = Model(inputs=base_model.input, outputs=predictions)

# Compile the model
modelPreTrained.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])

modelPreTrained.summary()

Model: "model_5"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_6 (InputLayer)           [(None, 256, 256, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 262, 262, 3)  0           ['input_6[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 128, 128, 64  9472        ['conv1_pad[0][0]']              
                                )                                                                 
                                                                                            

                                                                                                  
 conv2_block3_1_relu (Activatio  (None, 64, 64, 64)  0           ['conv2_block3_1_bn[0][0]']      
 n)                                                                                               
                                                                                                  
 conv2_block3_2_conv (Conv2D)   (None, 64, 64, 64)   36928       ['conv2_block3_1_relu[0][0]']    
                                                                                                  
 conv2_block3_2_bn (BatchNormal  (None, 64, 64, 64)  256         ['conv2_block3_2_conv[0][0]']    
 ization)                                                                                         
                                                                                                  
 conv2_block3_2_relu (Activatio  (None, 64, 64, 64)  0           ['conv2_block3_2_bn[0][0]']      
 n)       

                                                                                                  
 conv3_block3_1_relu (Activatio  (None, 32, 32, 128)  0          ['conv3_block3_1_bn[0][0]']      
 n)                                                                                               
                                                                                                  
 conv3_block3_2_conv (Conv2D)   (None, 32, 32, 128)  147584      ['conv3_block3_1_relu[0][0]']    
                                                                                                  
 conv3_block3_2_bn (BatchNormal  (None, 32, 32, 128)  512        ['conv3_block3_2_conv[0][0]']    
 ization)                                                                                         
                                                                                                  
 conv3_block3_2_relu (Activatio  (None, 32, 32, 128)  0          ['conv3_block3_2_bn[0][0]']      
 n)       

                                                                                                  
 conv4_block2_1_bn (BatchNormal  (None, 16, 16, 256)  1024       ['conv4_block2_1_conv[0][0]']    
 ization)                                                                                         
                                                                                                  
 conv4_block2_1_relu (Activatio  (None, 16, 16, 256)  0          ['conv4_block2_1_bn[0][0]']      
 n)                                                                                               
                                                                                                  
 conv4_block2_2_conv (Conv2D)   (None, 16, 16, 256)  590080      ['conv4_block2_1_relu[0][0]']    
                                                                                                  
 conv4_block2_2_bn (BatchNormal  (None, 16, 16, 256)  1024       ['conv4_block2_2_conv[0][0]']    
 ization) 

 conv4_block5_1_conv (Conv2D)   (None, 16, 16, 256)  262400      ['conv4_block4_out[0][0]']       
                                                                                                  
 conv4_block5_1_bn (BatchNormal  (None, 16, 16, 256)  1024       ['conv4_block5_1_conv[0][0]']    
 ization)                                                                                         
                                                                                                  
 conv4_block5_1_relu (Activatio  (None, 16, 16, 256)  0          ['conv4_block5_1_bn[0][0]']      
 n)                                                                                               
                                                                                                  
 conv4_block5_2_conv (Conv2D)   (None, 16, 16, 256)  590080      ['conv4_block5_1_relu[0][0]']    
                                                                                                  
 conv4_blo

                                                                  'conv5_block1_3_bn[0][0]']      
                                                                                                  
 conv5_block1_out (Activation)  (None, 8, 8, 2048)   0           ['conv5_block1_add[0][0]']       
                                                                                                  
 conv5_block2_1_conv (Conv2D)   (None, 8, 8, 512)    1049088     ['conv5_block1_out[0][0]']       
                                                                                                  
 conv5_block2_1_bn (BatchNormal  (None, 8, 8, 512)   2048        ['conv5_block2_1_conv[0][0]']    
 ization)                                                                                         
                                                                                                  
 conv5_block2_1_relu (Activatio  (None, 8, 8, 512)   0           ['conv5_block2_1_bn[0][0]']      
 n)       

In [52]:

# Adjust the data generator to repeat the grayscale channel 3 times
class AdjustedWLDataGenerator(WLDataGenerator):
    def __getitem__(self, idx):
        batch_x, batch_y = super().__getitem__(idx)
        #batch_x = np.repeat(batch_x, 3, axis=-1)  # Repeat the grayscale channel 3 times
        
        #to give different weightings that the feature maps can use
        batch_x_square = np.square(batch_x)
        batch_x_cube = np.power(batch_x, 3)
        batch_x = np.concatenate([batch_x, batch_x_square, batch_x_cube], axis=-1)
        
        #print(np.shape())  
        return batch_x, batch_y

  
# Create adjusted data generators
train_gen_adjusted = AdjustedWLDataGenerator(WL_tensor[:, :training_data_size, :, :], WL_labels[:, :training_data_size, :], batch_size)
val_gen_adjusted = AdjustedWLDataGenerator(WL_tensor[:, training_data_size:training_data_size+validation_data_size, :, :], \
                                           WL_labels[:, training_data_size:training_data_size+validation_data_size, :], batch_size)

# Train the model using the adjusted data generators
modelPreTrained.fit(train_gen_adjusted, validation_data=val_gen_adjusted, epochs=5)

Epoch 1/5


2023-10-25 17:12:35.530968: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]




2023-10-25 17:26:23.332837: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x138bf7410>