### Getting Labeled DataLoader
```python
from data_loading import get_loaders
trainloader, valloader = get_loaders('labeled')
```
### Getting Unlabeled DataLoader
```python
from data_loading import get_loaders
trainloader, valloader = get_loaders('unlabeled')
```

In [35]:
import os
from PIL import Image

import numpy as np
import pickle

In [11]:
np.random.seed(42420)

In [107]:
# To avoid data leakage (and possibly if we add a temporal component to the model) 
# we need to split our training/valudation sets by scene.

# The first 106 scenes are unlabeled
unlabeled_scene_index = np.arange(106)
# Train/validation split
val_unlabeled_scene_index = np.random.choice(unlabeled_scene_index, 
                                                  size=round(len(unlabeled_scene_index)*.1), 
                                                  replace=False)
train_unlabeled_scene_index = np.array([i for i in unlabeled_scene_index if i not in val_unlabeled_scene_index])

# The (28) scenes from 106 - 133 are labeled
labeled_scene_index = np.arange(106, 134)
# Train/validation split
val_labeled_scene_index = np.random.choice(labeled_scene_index, 
                                                  size=round(len(labeled_scene_index)*.1), 
                                                  replace=False)
train_labeled_scene_index = np.array([i for i in labeled_scene_index if i not in val_labeled_scene_index])

In [109]:
# Save
pickle.dump(train_unlabeled_scene_index, open('data_utils/unlabeled_scene_index_train.p', 'wb'))
pickle.dump(val_unlabeled_scene_index, open('data_utils/unlabeled_scene_index_val.p', 'wb'))
pickle.dump(train_labeled_scene_index, open('data_utils/labeled_scene_index_train.p', 'wb'))
pickle.dump(val_labeled_scene_index, open('data_utils/labeled_scene_index_val.p', 'wb'))

In [58]:
# For unlabeled data
# Compute mean/std in two steps so we don't load every image into RAM at the same time
r_channel_sum = 0
g_channel_sum = 0
b_channel_sum = 0
count = 0
path = 'data'
for scene in train_unlabeled_scene_index:
    filepath = os.path.join(path, 'scene_{}'.format(scene))
    for sample in os.listdir(filepath):
        samplepath = os.path.join(filepath, sample)
        for filename in os.listdir(samplepath):
            img = np.array(Image.open(os.path.join(samplepath, filename))) / 255
            r_channel_sum += np.sum(img[:,:,0])
            g_channel_sum += np.sum(img[:,:,1])
            b_channel_sum += np.sum(img[:,:,2])
            count += img.shape[0] * img.shape[1]
        
r_channel_mean = r_channel_sum/count  
g_channel_mean = g_channel_sum/count
b_channel_mean = b_channel_sum/count  

r_channel_sum = 0
g_channel_sum = 0
b_channel_sum = 0
for scene in train_unlabeled_scene_index:
    filepath = os.path.join(path, 'scene_{}'.format(scene))
    for sample in os.listdir(filepath):
        samplepath = os.path.join(filepath, sample)
        for filename in os.listdir(samplepath):
            img = np.array(Image.open(os.path.join(samplepath, filename))) / 255
            r_channel_sum += np.sum(np.square(img[:,:,0] - r_channel_mean))
            g_channel_sum += np.sum(np.square(img[:,:,1] - g_channel_mean))
            b_channel_sum += np.sum(np.square(img[:,:,2] - b_channel_mean))

r_channel_std = np.sqrt(r_channel_sum/(count-1))
g_channel_std = np.sqrt(g_channel_sum/(count-1))
b_channel_std = np.sqrt(b_channel_sum/(count-1))

# Save
file = open("unlabeled_data_statistics.txt","a") 
file.write('{},{},{} \n'.format(r_channel_mean, g_channel_mean, b_channel_mean))
file.write('{},{},{}'.format(r_channel_std, g_channel_std, b_channel_std))
file.close() 

In [60]:
# For labeled data
# Compute mean/std in two steps so we don't load every image into RAM at the same time
r_channel_sum = 0
g_channel_sum = 0
b_channel_sum = 0
count = 0
path = 'data'
for scene in train_labeled_scene_index:
    filepath = os.path.join(path, 'scene_{}'.format(scene))
    for sample in os.listdir(filepath):
        samplepath = os.path.join(filepath, sample)
        for filename in os.listdir(samplepath):
            img = np.array(Image.open(os.path.join(samplepath, filename))) / 255
            r_channel_sum += np.sum(img[:,:,0])
            g_channel_sum += np.sum(img[:,:,1])
            b_channel_sum += np.sum(img[:,:,2])
            count += img.shape[0] * img.shape[1]
        
r_channel_mean = r_channel_sum/count  
g_channel_mean = g_channel_sum/count
b_channel_mean = b_channel_sum/count  

r_channel_sum = 0
g_channel_sum = 0
b_channel_sum = 0
for scene in train_labeled_scene_index:
    filepath = os.path.join(path, 'scene_{}'.format(scene))
    for sample in os.listdir(filepath):
        samplepath = os.path.join(filepath, sample)
        for filename in os.listdir(samplepath):
            img = np.array(Image.open(os.path.join(samplepath, filename))) / 255
            r_channel_sum += np.sum(np.square(img[:,:,0] - r_channel_mean))
            g_channel_sum += np.sum(np.square(img[:,:,1] - g_channel_mean))
            b_channel_sum += np.sum(np.square(img[:,:,2] - b_channel_mean))

r_channel_std = np.sqrt(r_channel_sum/(count-1))
g_channel_std = np.sqrt(g_channel_sum/(count-1))
b_channel_std = np.sqrt(b_channel_sum/(count-1))

# Save
file = open("labeled_data_statistics.txt","a") 
file.write('{},{},{} \n'.format(r_channel_mean, g_channel_mean, b_channel_mean))
file.write('{},{},{}'.format(r_channel_std, g_channel_std, b_channel_std))
file.close() 