# Multi-spectral Time-series Classification

## Import data

### Metadata

In [12]:
import pickle
import os

metapath = '/home/dsyrrafos/Documents/NTUA/brill/multi-spectral-ts-clf/data/Exercise4/timematch_data/denmark/32VNH/2017/meta/'
datapath = '/home/dsyrrafos/Documents/NTUA/brill/multi-spectral-ts-clf/data/Exercise4/timematch_data/denmark/32VNH/2017/data/'

with open(os.path.join(metapath, "metadata.pkl"), 'rb') as f:
    metadata = pickle.load(f)

type(metadata)

dict

In [13]:
metadata.keys()

dict_keys(['start_date', 'end_date', 'dates', 'cloudy_pct', 'parcels'])

In [14]:
len(metadata['dates'])

52

In [15]:
len(metadata['cloudy_pct'])

52

In [16]:
len(metadata['parcels'])

60332

In [17]:
metadata['parcels']

[{'id': '22-0',
  'label': 216.0,
  'n_pixels': 896,
  'block': 13,
  'geometric_features': [2841.5190707380984,
   89390.00914778022,
   0.03178788208915471,
   0.28607918263090676]},
 {'id': '23-0',
  'label': 216.0,
  'n_pixels': 365,
  'block': 13,
  'geometric_features': [1977.4892593095217,
   36460.20046263565,
   0.05423692777926575,
   0.1469404186795491]},
 {'id': '24-0',
  'label': 216.0,
  'n_pixels': 1331,
  'block': 13,
  'geometric_features': [4680.880638294851,
   132822.44602312316,
   0.03524163858177972,
   0.3973134328358209]},
 {'id': '25-0',
  'label': 216.0,
  'n_pixels': 460,
  'block': 13,
  'geometric_features': [1020.1426807603821,
   45574.55926584079,
   0.022384038314222452,
   0.5989583333333334]},
 {'id': '26-0',
  'label': 216.0,
  'n_pixels': 388,
  'block': 13,
  'geometric_features': [935.9998287090416,
   38794.84500732286,
   0.02412691244242279,
   0.6339869281045751]},
 {'id': '52-0',
  'label': 1.0,
  'n_pixels': 769,
  'block': 13,
  'geometric

### Dates

In [18]:
import json

with open(os.path.join(metapath, "dates.json"), 'rb') as f:
    dates = json.load(f)

In [19]:
len(dates)

52

### Labels

In [20]:
import json

with open(os.path.join(metapath, "labels.json"), 'rb') as f:
    labels = json.load(f)

In [21]:
len(labels)

5001

In [27]:
labels

{'0': 'corn',
 '1': 'corn',
 '2': 'corn',
 '3': 'corn',
 '4': 'corn',
 '5': 'spring_barley',
 '6': 'corn',
 '7': 'corn',
 '8': 'corn',
 '9': 'meadow',
 '10': 'corn',
 '11': 'corn',
 '12': 'spring_barley',
 '13': 'corn',
 '14': 'corn',
 '15': 'corn',
 '16': 'corn',
 '17': 'corn',
 '18': 'corn',
 '19': 'corn',
 '20': 'corn',
 '21': 'corn',
 '22': 'meadow',
 '23': 'winter_wheat',
 '24': 'winter_rapeseed',
 '25': 'unknown',
 '26': 'winter_barley',
 '27': 'winter_barley',
 '28': 'winter_rye',
 '29': 'winter_rye',
 '30': 'winter_barley',
 '31': 'spring_barley',
 '32': 'spring_barley',
 '33': 'unknown',
 '34': 'unknown',
 '35': 'unknown',
 '36': 'unknown',
 '37': 'unknown',
 '38': 'unknown',
 '39': 'unknown',
 '40': 'unknown',
 '41': 'unknown',
 '42': 'winter_wheat',
 '43': 'meadow',
 '44': 'spring_barley',
 '45': 'winter_wheat',
 '46': 'winter_barley',
 '47': 'spring_barley',
 '48': 'spring_barley',
 '49': 'spring_barley',
 '50': 'spring_barley',
 '51': 'winter_rye',
 '52': 'winter_rye',
 '5

## Filter low support categories

In [25]:
from collections import Counter

labels_counts = Counter(list(labels.values()))
labels_counts

Counter({'corn': 275,
         'spring_barley': 1141,
         'meadow': 1013,
         'winter_wheat': 856,
         'winter_rapeseed': 301,
         'unknown': 511,
         'winter_barley': 352,
         'winter_rye': 317,
         'spring_peas': 17,
         'spring_oat': 120,
         'horsebeans': 28,
         'winter_triticale': 42,
         'spring_wheat': 26,
         'spring_triticale': 2})

In [26]:
labels_counts = {k:v for k,v in labels_counts.items() if v>=200}
labels_counts

{'corn': 275,
 'spring_barley': 1141,
 'meadow': 1013,
 'winter_wheat': 856,
 'winter_rapeseed': 301,
 'unknown': 511,
 'winter_barley': 352,
 'winter_rye': 317}

In [54]:
labels_filtered = {k: v for k,v in labels.items() if v in labels_counts.keys()}
len(labels_filtered)

4766

## Import TS data

In [31]:
import os
def get_immediate_subdirectories(a_dir):
    return [name for name in os.listdir(a_dir)
            if os.path.isdir(os.path.join(a_dir, name))]

zarr_dir = './data/Exercise4/timematch_data/denmark/32VNH/2017/data/'

zarr_files = get_immediate_subdirectories(zarr_dir)

In [55]:
import zarr
import numpy as np

arr = zarr.load(zarr_dir + zarr_files[0])
print(arr.shape)
print(np.max(arr), np.min(arr))

(52, 10, 522)
10967 64


In [56]:
arr = zarr.load(zarr_dir + zarr_files[1])
print(arr.shape)
print(np.max(arr), np.min(arr))

(52, 10, 149)
7409 183


## Normalize data

In [None]:
# Min-max scaling

max_pixel = - np.inf
min_pixel = np.inf

for k in labels_filtered.keys():
    arr = zarr.load(zarr_dir + f"{k}.zarr")
    if np.max(arr) > max_pixel:
        max_pixel = np.max(arr)
    if np.min(arr) > min_pixel:
        min_pixel = np.min(arr)
    
print(max_pixel, min_pixel)

## Samlpe pixels