# Load data

In [3]:
# data import
import pickle
import numpy as np

# Train, test, split
from sklearn.model_selection import train_test_split

# Model
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping

In [5]:
img_3D_path = ('../raw_data/img3D-conv.pickle')

with open(img_3D_path, 'rb') as f:
    img_3D_conv = pickle.load(f)



# Functions

In [6]:
def stacking(img3D, window, lat_step, lon_step, time_step):
    
    grid_offset = np.array([0,0,0]) # Where do you start
    
    #new steps from precise grid
    grid_spacing = np.array([lat_step , lon_step, time_step]) 
    #get points coordinates
    coords = np.argwhere(window)
    flat = window.flatten()
    values = flat[flat !=0]
    
    # Convert point to index
    indexes = np.round((coords - grid_offset)/grid_spacing).astype('int')
    X = indexes[:,0] 
    Y = indexes[:,1] 
    Z = indexes[:,2]
    
    #virgin matrix
    a = np.zeros((int(img3D.shape[0]/lat_step)+2, int(img3D.shape[1]/lon_step)+2,Z.max()+1))  
    
    for i in range(len(indexes)):
        if a[X[i], Y[i], Z[i]] == 0:
            a[X[i], Y[i], Z[i]] = values[i]
        else:
            a[X[i], Y[i], Z[i]] += values[i]
                        
    return a

In [7]:
def get_observation_target(img3D,
                           obs_timeframe,obs_lat,obs_lon, obs_time,
                           target_timeframe,  tar_lat,tar_lon, tar_time):
    '''
    output an observation of x_length consecutive images and the y_length next images as the target
    obs_step, obs_timeframe, target_step, target_timeframe : unit = hours
    '''
    #function from raw to hours
    #print('creating obs')
    length = obs_timeframe + target_timeframe
    
    position = np.random.randint(0, img3D.shape[2] - length)

    subsample = img3D[:, :, position : position + length]
    #print(subsample.shape)
    
    observations, targets = np.split(subsample,[obs_timeframe], axis=2) # divide the subsample in X and y
    
    #print(observations.shape)
    #print(observations.min(), observations.max())
    
    observation = stacking(img3D, observations, obs_lat, obs_lon, obs_time) #get stacked hours for all images
    print(observation.shape)
    #print (targets.shape)
    
    target = stacking(img3D, targets,  tar_lat, tar_lon, tar_time )
    print(target.shape)
    return observation, target

In [8]:
def get_X_y(img3D_conv, nb_observations, obs_tf,obs_lat,obs_lon, obs_time,
                tar_tf, tar_lat,tar_lon, tar_time):
    '''
    outputs n observations and their associated targets
    '''
    X = []
    y = []
    
    for n in range(nb_observations):
        print(f'creating obs {n+1} out of {nb_observations}')
        X_subsample, y_subsample = get_observation_target(img3D_conv,
                                       obs_tf,obs_lat,obs_lon, obs_time,
                                       tar_tf,  tar_lat,tar_lon, tar_time)
        X.append(X_subsample)
        y.append(y_subsample)
        
    X = np.array(X)
    y = np.array(y)

    del X_subsample, y_subsample, n
    
    return X, y

# Get X_y

In [9]:
obs_lon = 5 # 5*5m together in lon
obs_lat = 5 # 5*5m
obs_time = 3 # 3h stacked together
obs_tf = 48 # for 48h straight as X
tar_lon = 5 #9*10m together in lon
tar_lat = 5 # 9*8m
tar_time = 2 # 2h stacked together
tar_tf = 12 # for 4h straight as y
nb_observations = 20

In [10]:
X, y = get_X_y(img_3D_conv, nb_observations,
              obs_tf,obs_lat,obs_lon, obs_time,
              tar_tf, tar_lat,tar_lon, tar_time)

creating obs 1 out of 20
(112, 98, 17)
(112, 98, 7)
creating obs 2 out of 20
(112, 98, 17)
(112, 98, 7)
creating obs 3 out of 20
(112, 98, 17)
(112, 98, 7)
creating obs 4 out of 20
(112, 98, 17)
(112, 98, 7)
creating obs 5 out of 20
(112, 98, 17)
(112, 98, 7)
creating obs 6 out of 20
(112, 98, 17)
(112, 98, 7)
creating obs 7 out of 20
(112, 98, 17)
(112, 98, 7)
creating obs 8 out of 20
(112, 98, 17)
(112, 98, 7)
creating obs 9 out of 20
(112, 98, 17)
(112, 98, 7)
creating obs 10 out of 20
(112, 98, 17)
(112, 98, 7)
creating obs 11 out of 20
(112, 98, 17)
(112, 98, 7)
creating obs 12 out of 20
(112, 98, 17)
(112, 98, 7)
creating obs 13 out of 20
(112, 98, 17)
(112, 98, 7)
creating obs 14 out of 20
(112, 98, 17)
(112, 98, 7)
creating obs 15 out of 20
(112, 98, 17)
(112, 98, 7)
creating obs 16 out of 20
(112, 98, 17)
(112, 98, 7)
creating obs 17 out of 20
(112, 98, 17)
(112, 98, 7)
creating obs 18 out of 20
(112, 98, 17)
(112, 98, 7)
creating obs 19 out of 20
(112, 98, 17)
(112, 98, 7)
cr

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [12]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((16, 112, 98, 17), (4, 112, 98, 17), (16, 112, 98, 7), (4, 112, 98, 7))

## Try conv 3D

In [None]:
X_train_reshaped = X_train.reshape(-1, X_train.shape[1], X_train.shape[2], X_train.shape[3],1)
X_test_reshaped = X_test.reshape(-1,  X_train.shape[1], X_train.shape[2], X_train.shape[3],1)
y_train_reshaped = y_train.reshape(-1, y_train.shape[1], y_train.shape[2], y_train.shape[3],1)
y_test_reshaped = y_test.reshape(-1, y_train.shape[1], y_train.shape[2], y_train.shape[3],1)

In [None]:
def init_model():
    print('init model')
    model = models.Sequential()

    print('3D conv 1')
    model.add(layers.Conv3D(64, kernel_size = (4,4,4), activation = 'relu', padding='same',
                              input_shape = (X_train.shape[1], X_train.shape[2], X_train.shape[3],1)))
    model.add(layers.MaxPooling3D(2))

    print('flatten')
    model.add(layers.Flatten())

    #print('dense 1')
    #model.add(layers.Dense(63, activation = 'relu'))

    #print('dense 2')
    #model.add(layers.Dense(63*7, activation = 'relu'))

    #print('dense 3')
    #model.add(layers.Dense(63*7*55/2, activation = 'relu'))

    print('dense 4')
    model.add(layers.Dense(63*7*55*2/3, activation = 'relu'))


    print('dense 5')
    model.add(layers.Dense(63*7*55, activation = 'relu'))

    print('reshape')
    model.add(layers.Reshape((63, 55, 7)))
    model.compile(loss ='mse',
             optimizer='adam',
             metrics='mae')
    return model

In [None]:
model = init_model()
model.summary()

In [None]:
es = EarlyStopping(patience = 3, restore_best_weights=True)

history = model.fit(X_train_reshaped, y_train_reshaped,
                      batch_size = 32, 
                      epochs = 100,
                      validation_split = 0.3,
                      callbacks = es)

## Try conv2D

In [None]:
def init_model_2D():
    print('init model')
    model = models.Sequential()

    print('2D conv 1')
    model.add(layers.Conv2D(64, kernel_size = (4,4), activation = 'relu', padding='same',
                              input_shape = (X_train.shape[1], X_train.shape[2], X_train.shape[3])))
    model.add(layers.MaxPooling2D(2))
    
    print('2D conv 2')
    model.add(layers.Conv2D(128, kernel_size = (4,4), activation = 'relu', padding='same'))
    model.add(layers.MaxPooling2D(2))

    print('flatten')
    model.add(layers.Flatten())

    #print('dense 1')
    #model.add(layers.Dense(63, activation = 'relu'))

    #print('dense 2')
    #model.add(layers.Dense(63*7, activation = 'relu'))

    #print('dense 3')
    #model.add(layers.Dense(63*7*55/2, activation = 'relu'))

    print('dense 4')
    model.add(layers.Dense(112*7*98*2/3, activation = 'relu'))


    print('dense 5')
    model.add(layers.Dense(112*7*98, activation = 'relu'))

    print('reshape')
    model.add(layers.Reshape((112, 98, 7)))
      
    #compile
    model.compile(loss ='mse',
             optimizer='adam',
             metrics='mae')
    return model

In [18]:
model = init_model_2D()

init model
2D conv 1
2D conv 2
flatten
dense 4


KeyboardInterrupt: 

In [None]:
model.summary()

## Alice_model

In [6]:
def init_model_base():
    
    model = models.Sequential()

    model.add(layers.GaussianNoise(stddev = 2.5, input_shape = (8, 112, 84,1)))
    
    model.add(layers.Conv3D(64, kernel_size = 3, activation = 'relu'))
    
    model.add(layers.Conv3D(32, kernel_size = 3, activation = 'relu'))

    model.add(layers.Conv3D(16, kernel_size = 3, activation = 'relu'))

    model.add(layers.Flatten())

    model.add(layers.Dense(4 * 112 * 84))

    model.compile(loss = 'mse',
                      optimizer = 'adam',
                      metrics = 'mae')
    
    return model

In [7]:
trying = init_model_base()

KeyboardInterrupt: 

## by_hour_pickles

In [None]:
X_train_path = ('../raw_data/X_train_by_hour.pickle')
X_test_path = ('../raw_data/X_test_by_hour.pickle')
y_train_path = ('../raw_data/y_train_by_hour.pickle')
y_test_path = ('../raw_data/y_test_by_hour.pickle')

with open(X_train_path, 'rb') as f:
    X_train = pickle.load(f)
with open(X_test_path, 'rb') as f:
    X_test = pickle.load(f)
with open(y_train_path, 'rb') as f:
    y_train = pickle.load(f)
with open(y_test_path, 'rb') as f:
    y_test = pickle.load(f)


In [None]:
X_train_reshaped = X_train.reshape(-1, 176, 161, 17)
X_test_reshaped = X_test.reshape(-1, 176, 161, 17)
y_train_reshaped = y_train.reshape(-1, 176, 161, 7)
y_test_reshaped = y_test.reshape(-1, 176, 161, 7)

In [None]:
def init_model():
    print('init model')
    model = models.Sequential()

    print('3D conv 1')
    model.add(layers.Conv3D(64, kernel_size = (4,4,4), activation = 'relu', padding='same',
                              input_shape = (176, 161, 17,1)))
    model.add(layers.MaxPooling3D(2))

    print('flatten')
    model.add(layers.Flatten())

    print('dense 1')
    model.add(layers.Dense(176, activation = 'relu'))

    print('dense 2')
    model.add(layers.Dense(176*7, activation = 'relu'))

    print('dense 3')
    model.add(layers.Dense(176*7*161/2, activation = 'relu'))

    print('dense 4')
    model.add(layers.Dense(176*7*161*2/3, activation = 'relu'))


    print('dense 5')
    model.add(layers.Dense(176*161*3, activation = 'relu'))

    print('reshape')
    model.add(layers.Reshape((176, 161, 7)))
    model.compile(loss ='mse',
             optimizer='adam',
             metrics='mae')
    return model

In [None]:
model = init_model()
model.summary()