# Get data from folders

In [1]:
import numpy as np
from os import listdir
from os.path import join

channels = 13

In [2]:
path = 'ds/images/remote_sensing/otherDatasets/sentinel_2/tif/'

In [3]:
listdir(path)

['AnnualCrop',
 'Forest',
 'HerbaceousVegetation',
 'Highway',
 'Industrial',
 'Pasture',
 'PermanentCrop',
 'Residential',
 'River',
 'SeaLake']

In [4]:
classes = listdir(path)

In [5]:
def gdalproxy2nparr(proxy):
    arr = np.zeros((64, 64, channels), dtype=np.float32)
    for band in range(channels):
        arr[:, :, band] = proxy.GetRasterBand(band + 1).ReadAsArray()
    return arr

In [6]:
from osgeo import gdal

Y = []
X = []

label = 0
counter = 0
for folder in listdir(path):
    folder_path = join(path, folder)
    for image_name in listdir(folder_path):
        image_path = join(folder_path, image_name)
        
        image = gdal.Open(image_path, gdal.GA_ReadOnly)
        
        X.append(gdalproxy2nparr(image))
        Y.append(label)
    print("{0}/10".format(label + 1))
    label += 1

1/10
2/10
3/10
4/10
5/10
6/10
7/10
8/10
9/10
10/10


In [7]:
X = np.array(X)
Y = np.array(Y)

In [8]:
X.shape,Y.shape

((27000, 64, 64, 13), (27000,))

# Normalize and shuffle

In [9]:
from sklearn.preprocessing import StandardScaler

for channel in range(channels):
    scaler = StandardScaler().fit(X[:, :, :, channel].reshape(-1, 1))
    X[:, :, :, channel] = scaler.transform(X[:, :, :, channel].reshape(-1, 1)).reshape(27000,64,64)

In [10]:
from sklearn.utils import shuffle

X, Y = shuffle(X, Y)

# Save data

In [11]:
save_file = 'data.npy'

In [12]:
with open(save_file, 'wb') as f:
    np.save(f, X)
    np.save(f, Y)

In [13]:
X

array([[[[-1.49727732e-01, -6.12409890e-01, -4.45030481e-01, ...,
           1.13010451e-01, -1.36996254e-01,  6.37517393e-01],
         [-1.49727732e-01, -6.12409890e-01, -4.45030481e-01, ...,
           1.13010451e-01, -1.36996254e-01,  6.37517393e-01],
         [-1.53804526e-01, -5.46431124e-01, -1.38871506e-01, ...,
           7.11191297e-02, -2.22452357e-01,  7.55242825e-01],
         ...,
         [ 1.11326482e-03, -2.79516995e-01, -2.37550855e-01, ...,
          -1.93195209e-01, -1.60661012e-01,  1.72261242e-02],
         [ 7.44953752e-02, -9.65758190e-02, -2.57792771e-01, ...,
          -1.75241783e-01, -3.31342481e-02, -2.23908037e-01],
         [ 1.39723927e-01, -7.55825713e-02, -8.32062289e-02, ...,
          -1.23376325e-01,  9.43925306e-02, -3.11593205e-01]],

        [[-1.49727732e-01, -6.12409890e-01, -4.45030481e-01, ...,
           1.13010451e-01, -1.36996254e-01,  6.37517393e-01],
         [-1.49727732e-01, -6.12409890e-01, -4.45030481e-01, ...,
           1.13010451e

In [14]:
Y

array([7, 3, 6, ..., 1, 7, 6])