In [None]:
import numpy as np

num_samples = 200000  
x_shape = (100, 200, 3)
y_shape = (2,)

x_dtype = 'float32'  # Determine the appropriate dtype
y_dtype = 'float32'  # Determine the appropriate dtype

x_memmap = np.memmap('x_dataset.memmap', dtype=x_dtype, mode='w+', shape=(num_samples,) + x_shape)
y_memmap = np.memmap('y_dataset.memmap', dtype=y_dtype, mode='w+', shape=(num_samples,) + y_shape)


In [None]:
import pickle
import glob

def process_and_combine_pkl_files_to_memmap(directory_path, x_memmap, y_memmap):
    current_index = 0
    
    for file_path in glob.glob(directory_path + '/*.pkl'):
        with open(file_path, 'rb') as file:
            data = pickle.load(file)
        
        if 'X' in data and 'Y' in data:
            num_samples = len(data['X'])  
            x_batch = np.array(data['X'], dtype=x_memmap.dtype).reshape((num_samples,) + x_shape)
            y_batch = np.array(data['Y'], dtype=y_memmap.dtype).reshape((num_samples,) + y_shape)
            
            # Write directly to the memmap files
            x_memmap[current_index:current_index + num_samples] = x_batch
            y_memmap[current_index:current_index + num_samples] = y_batch
            
            current_index += num_samples

    x_memmap.flush()
    y_memmap.flush()


In [None]:
directory_path = './process_MPIIGaze' 
process_and_combine_pkl_files_to_memmap(directory_path, x_memmap, y_memmap)


In [None]:
def memmap_batch_generator(x_memmap_path, y_memmap_path, batch_size=32, shuffle=True):
    # Load the memory-mapped files
    x_memmap = np.memmap(x_memmap_path, dtype='float32', mode='r', shape=(200000, 100, 200, 3))  # Adjust shape and dtype
    y_memmap = np.memmap(y_memmap_path, dtype='float32', mode='r', shape=(200000, 2))  # Adjust shape and dtype

    num_samples = len(x_memmap)
    indices = np.arange(num_samples)

    while True:
        if shuffle:
            np.random.shuffle(indices)

        for start_idx in range(0, num_samples, batch_size):
            end_idx = min(start_idx + batch_size, num_samples)
            batch_indices = indices[start_idx:end_idx]

            # Yield a batch of data
            yield x_memmap[batch_indices], y_memmap[batch_indices]


In [None]:
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam
from keras.models import Sequential
from keras.layers import Conv2D, Dense, Dropout, Flatten
from keras.regularizers import l2
# Maxpooling
from keras.layers import MaxPooling2D
#BatchNormalization
from keras.layers import BatchNormalization

# Your model definition
model = Sequential([
    Conv2D(32, (7, 7), activation='relu', input_shape=(100, 200, 3), kernel_regularizer=l2(0.001)),
    
    
    Conv2D(64, (7, 7), activation='relu'),
    MaxPooling2D((2, 2)),
    BatchNormalization(),
    Dropout(0.15),


    Conv2D(128, (5, 5), activation='relu', kernel_regularizer=l2(0.001)),
    Dropout(0.15),
    MaxPooling2D((2, 2)),
    

    Conv2D(256, (5, 5), activation='relu'),


    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(2, activation='sigmoid')
])

model.compile(optimizer=Adam(learning_rate=0.00005), loss='mse', metrics=['mean_squared_error', 'mean_absolute_error'])

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)



In [None]:
batch_size = 16

In [None]:
# Assuming you have already set up your model

# Paths to your memmap files
x_memmap_path = 'x_dataset.memmap'
y_memmap_path = 'y_dataset.memmap'

# Calculate steps per epoch and validation steps
num_train_samples = int(200000 * 0.6)  # Example: 60% for training
num_val_samples = int(200000 * 0.15)   # Example: 15% for validation

steps_per_epoch = num_train_samples // batch_size
validation_steps = num_val_samples // batch_size

# Create generators
train_generator = memmap_batch_generator(x_memmap_path, y_memmap_path, batch_size, shuffle=True)
validation_generator = memmap_batch_generator(x_memmap_path, y_memmap_path, batch_size, shuffle=False)  # Assuming you can use the same for simplicity

# Train the model
model.fit(
    train_generator,
    steps_per_epoch=steps_per_epoch,
    epochs=100,  # Adjust as needed
    validation_data=validation_generator,
    validation_steps=validation_steps,
    callbacks=[early_stopping]
)


In [None]:
# Example: Evaluate on a subset
x_test_memmap = np.memmap(x_memmap_path, dtype='float32', mode='r', shape=(200000, 100, 200, 3))
y_test_memmap = np.memmap(y_memmap_path, dtype='float32', mode='r', shape=(200000, 2))

# Assuming the last 15% of the data is for testing
test_start_index = int(200000 * 0.85)
x_test = x_test_memmap[test_start_index:]
y_test = y_test_memmap[test_start_index:]

model.evaluate(x_test, y_test, batch_size=16)
