# Feature Extraction

The dataset are available for [download here](https://javerianacaliedu-my.sharepoint.com/:f:/g/personal/ccdelgado_javerianacali_edu_co/Estk6ZSR0CpEkqADhvUfnHABsrVhkDKPi19S47bdG4T4sg?e=Yo5dQr).

In [8]:
# Import libraries
import os
import glob
import numpy as np
import pandas as pd
import geopandas as gpd
import pipeline as pipe

from tqdm import tqdm

In [9]:
#Directory
inDir = r'C:\Users\cdelgado\Downloads'           
os.chdir(inDir)

In [10]:
#Files
tifFiles = glob.glob('JSTAR-2020\**\orthomosaics\**.tif')
CPFiles = glob.glob('JSTAR-2020\**\shapes\**_CP.shp')
plotFiles = glob.glob('JSTAR-2020\**\shapes\**_PLOTS.shp')

In [11]:
#Do the job!
global_stats = pd.DataFrame()

for i, imgDir in tqdm(enumerate(tifFiles)):
    season = imgDir.split("\\")[1]
    dataset = imgDir.split("\\")[3].split(".")[0]
    print('Processing dataset:' + dataset)
    
    #Read image and metadata
    img, x_sz, y_sz, n_band, ext, proj, datatype = pipe.readImg(imgDir)
    
    #Rasterize Panels
    Panels_ds = pipe.rasterize(CPFiles[i], y_sz, x_sz, ext, proj, ['ALL_TOUCHED=FALSE', 'ATTRIBUTE=ID'])
    roiPanels = Panels_ds.GetRasterBand(1).ReadAsArray().astype(np.uint32)
    
    #ELC
    img_elc = pipe.ELC(img, roiPanels, y_sz, x_sz, n_band, datatype)
    
    #Crop Masking
    Field_ds = pipe.rasterize(plotFiles[i], y_sz, x_sz, ext, proj, ['ALL_TOUCHED=FALSE', 'ATTRIBUTE=PLOT'])
    roiField = Field_ds.GetRasterBand(1).ReadAsArray().astype(np.uint32)
    
    img_field = pipe.createEmpty(y_sz, x_sz, n_band, datatype)
    for band in range(n_band):
        img_field[:, :, band] = img[:, :, band]*roiField
        
    CC = pipe.mask(img_field)
    
    img_crop = pipe.createEmpty(y_sz, x_sz, n_band, datatype)
    
    for band in range(n_band):
        img_crop[:, :, band] = img_elc[:, :, band]*CC  
        
    #VIs
    NDRE = pipe.NVI(img_crop[:, :, 4],img_crop[:, :, 3])
    NDVI = pipe.NVI(img_crop[:, :, 4],img_crop[:, :, 2])
    GNDVI = pipe.NVI(img_crop[:, :, 4],img_crop[:, :, 1])
    BNDVI = pipe.NVI(img_crop[:, :, 4],img_crop[:, :, 0])

    ERVI = pipe.NVI(img_crop[:, :, 3],img_crop[:, :, 2])
    EGVI = pipe.NVI(img_crop[:, :, 3],img_crop[:, :, 1])
    EBVI = pipe.NVI(img_crop[:, :, 3],img_crop[:, :, 0])

    GRVI = pipe.NVI(img_crop[:, :, 1],img_crop[:, :, 2])
    GBVI = pipe.NVI(img_crop[:, :, 1],img_crop[:, :, 0])
    
    #Plot Extraction
    Indices = [NDRE,NDVI,GNDVI,BNDVI,ERVI,EGVI,EBVI,GRVI,GBVI]
    Names = ["NDRE","NDVI","GNDVI","BNDVI","ERVI","EGVI","EBVI","GRVI","GBVI"]
    
    Plot_ds = pipe.rasterize(plotFiles[i], y_sz, x_sz, ext, proj, ['ALL_TOUCHED=FALSE', 'ATTRIBUTE=ID'])
    roiPlot = Plot_ds.GetRasterBand(1).ReadAsArray().astype(np.uint32)
    Plotdata = gpd.read_file(plotFiles[i]).sort_values(by=['ID'])
    
    local_stats = pipe.Stats(roiPlot, Indices, Names) 
    local_stats['SEASON'] = season
    local_stats['DATASET'] = dataset
    join = Plotdata[["ID","TYPE","CLASS","SCORE"]].merge(local_stats, on='ID', how='inner')
    
    global_stats=global_stats.append(join, ignore_index=True)
    
global_stats = global_stats.dropna() 
global_stats.to_csv('FEATURES.csv', index = True)

0it [00:00, ?it/s]

Processing dataset:2017B-1


1it [00:29, 29.55s/it]

Processing dataset:2017B-2


2it [00:55, 28.36s/it]

Processing dataset:2017B-3


3it [01:22, 28.05s/it]

Processing dataset:2017B-4


4it [01:48, 27.47s/it]

Processing dataset:2018B-1


5it [02:17, 27.76s/it]

Processing dataset:2019A-1


6it [03:02, 33.06s/it]

Processing dataset:2019A-2


7it [03:42, 35.22s/it]

Processing dataset:2019A-3


8it [04:28, 38.38s/it]

Processing dataset:2019A-4


9it [05:06, 38.26s/it]

Processing dataset:2019B-1


10it [05:53, 40.76s/it]

Processing dataset:2019B-2


11it [06:40, 42.72s/it]

Processing dataset:2019B-3


12it [07:24, 37.04s/it]
