## UTILITARY SCRIPT TO CONVERT IMAGES IN AGGREGATED DATA

In [1]:
import pandas as pd
import numpy as np
import json
import itertools
import copy

In [2]:
DIM_AGREGATION = 25 # X 25 meters

In [3]:
df = pd.read_excel('data/data_parcelles_with_gps.xlsx')

In [4]:
df_base = df[['PARCELLE','LFI']]

In [10]:
LFI = 2 # define the campaign

one_plot = '114309' #the name of a plot of the dict

### Convert from one JSON file

In [6]:
with open(f"data/Images_LANDSAT_LFI{LFI}_pb1.json", 'r') as openfile:
 
    data = json.load(openfile)

In [7]:
len(data)

29

In [11]:
data[one_plot]['LFI']

'LFI2'

In [12]:
np.shape(data[one_plot]['IMAGES_SAT']['NDVI'])

(200, 200)

In [13]:
LFI = data[one_plot]['LFI']
df_result = df_base.loc[df_base['LFI']==LFI,:].reset_index(drop=True)

In [14]:
nb_parc = len(df_result)
problematic_plots = []
list_plots = df_result['PARCELLE'].tolist()
width = np.shape(data[one_plot]['IMAGES_SAT']['NDVI'])[0]
height = np.shape(data[one_plot]['IMAGES_SAT']['NDVI'])[1]
range_w = np.arange(int((width - DIM_AGREGATION)/2), int((width + DIM_AGREGATION)/2))
range_h = np.arange(int((height - DIM_AGREGATION)/2), int((height + DIM_AGREGATION)/2))
ndvi_arr = np.empty(nb_parc)
evi_arr = np.empty(nb_parc)
ndmi_arr = np.empty(nb_parc)
ndwi_arr = np.empty(nb_parc)
dswi_arr = np.empty(nb_parc)
MAPPING = {
    'NDVI' : ndvi_arr,
    'EVI' : evi_arr,
    'NDMI' : ndmi_arr,
    'NDWI' : ndwi_arr,
    'DSWI' : dswi_arr
}

for ind, parc in enumerate(list_plots):
    try:
        for func_name, arr in MAPPING.items():
            pixel_values = data[str(parc)]['IMAGES_SAT'][func_name]
            arr[ind] = round(np.mean([pixel_values[tupl[0]][tupl[1]] for tupl in list(itertools.product(range_w,range_h))]),4)
    except:
        problematic_plots.append(parc)
        for _ , arr in MAPPING.items():
            arr[ind] = np.nan

df_result['NDVI'] = ndvi_arr
df_result['EVI'] = evi_arr
df_result['NDMI'] = ndmi_arr
df_result['NDWI'] = ndwi_arr
df_result['DSWI'] = dswi_arr

In [15]:
df_result['DSWI'].isnull().sum()

2374

In [16]:
len(problematic_plots)

2374

In [17]:
df_result.to_excel(f'./data/Data_from_satellites_images_{LFI}_pb1.xlsx')

### Convert from multiple JSON files

In [18]:
nb_parts = 8 # numbers of files

LFI_name = 'LFI2'

LFI = 2

width = 200 # of initial images

height = 200 # of initial images

In [None]:
df_result_base = df_base.loc[df_base['LFI']==LFI_name,:].reset_index(drop=True)
dataframes_results = []
nb_parc = len(df_result_base)
list_plots = df_result_base['PARCELLE'].tolist()
range_w = np.arange(int((width - DIM_AGREGATION)/2), int((width + DIM_AGREGATION)/2))
range_h = np.arange(int((height - DIM_AGREGATION)/2), int((height + DIM_AGREGATION)/2))


for i in range(nb_parts):
    try:
        with open(f"data/Images_LANDSAT_LFI{LFI}_part{i+1}.json", 'r') as openfile:
            data = json.load(openfile)
        print(f'Opening file part {i+1} with length {len(data)} ...')

        empty_plots = []
        df_result = copy.copy(df_result_base)
        ndvi_arr = np.empty(nb_parc)
        evi_arr = np.empty(nb_parc)
        ndmi_arr = np.empty(nb_parc)
        ndwi_arr = np.empty(nb_parc)
        dswi_arr = np.empty(nb_parc)
        MAPPING = {
            'NDVI' : ndvi_arr,
            'EVI' : evi_arr,
            'NDMI' : ndmi_arr,
            'NDWI' : ndwi_arr,
            'DSWI' : dswi_arr
        }

        print('Conversion and aggregation...')
        try:
            for ind, parc in enumerate(list_plots):
                try:
                    for func_name, arr in MAPPING.items():
                        pixel_values = np.empty(2)
                        pixel_values = data[str(parc)]['IMAGES_SAT'][func_name]
                        arr[ind] = round(np.mean([pixel_values[tupl[0]][tupl[1]] for tupl in list(itertools.product(range_w,range_h))]),4)
                except:
                    empty_plots.append(parc)
                    for _ , arr in MAPPING.items():
                        arr[ind] = np.nan
        except:
            print(f'Conversion\'s problem with file part {i+i} ...')

        print('Creation of a dataframe...')
                
        df_result['NDVI'] = ndvi_arr
        df_result['EVI'] = evi_arr
        df_result['NDMI'] = ndmi_arr
        df_result['NDWI'] = ndwi_arr
        df_result['DSWI'] = dswi_arr

        dataframes_results.append(df_result)

        print(f"Difference of empty data for NDVI : {len(empty_plots) - df_result['NDVI'].isnull().sum()}")
        print(f"Difference of empty data for EVI : {len(empty_plots) - df_result['EVI'].isnull().sum()}")
        print(f"Difference of empty data for NDMI : {len(empty_plots) - df_result['NDMI'].isnull().sum()}")
        print(f"Difference of empty data for NDWI : {len(empty_plots) - df_result['NDWI'].isnull().sum()}")
        print(f"Difference of empty data for DSWI : {len(empty_plots) - df_result['DSWI'].isnull().sum()}")

    except:
        print(f'Opening problem with file part {i+i} ...')
    print('Done...')
print('Finish...')              

In [None]:
nb_lines = len(df_result_base)
result_arr = np.empty((nb_lines,5))

for i in range(nb_parts):
    c=0
    arr = np.empty((nb_lines,5))
    arr = dataframes_results[i].iloc[:,2:].to_numpy()
    for line in range(nb_lines):
        for col in range(5):
            if str(arr[line][col]) != 'nan':
                result_arr[line][col] = copy.copy(arr[line][col])
                c+=1
    print(f'Join n°{i} : {c} data copied ...')
    

In [None]:
df_result = copy.copy(df_result_base)
df_result[['NDVI','EVI','NDMI','NDWI','DSWI']] = result_arr
df_result

In [None]:
df_result.loc[df_result['PARCELLE']==12298,:]

In [None]:
df_result.to_excel(f'./data/Data_from_satellites_images_{LFI_name}.xlsx')