# Create Product report for MACS processing data

## 1. Data loading

## 2. File checks
* check completeness of input dirs and base files
* count tiles
* control for

## Imports

In [1]:
import pandas as pd
from pathlib import Path
import itertools

from utils_report import *

## Setup 
* paths

In [8]:
# setup basepaths
DIR_BASE = Path(r'S:\p_macsprocessing')
DIR_DATA_PRODUCTS = DIR_BASE / 'data_products'
DIR_AOI = DIR_BASE / 'aoi'

In [7]:
# check if directories all exists
for d in [DIR_BASE, DIR_DATA_PRODUCTS, DIR_AOI]:
    assert d.exists()

## Calculate Statistics 
* Files
* File Count
* file count accross types
* aoi (size?)


#### Subdirectory files

In [90]:
%time file_check_output = df.iloc[:].apply(file_check, axis=1)

CPU times: total: 2.97 s
Wall time: 59 s


#### Setup basic Dataframe and split input name

In [230]:
df = pd.DataFrame(columns=['project_name', 'products_dir'])
# create pathlist of output products
dir_list = list(DIR_DATA_PRODUCTS.glob('*'))
df['products_dir'] = dir_list
# get project name
df['project_name'] = df['products_dir'].apply(lambda x: x.name)
# add site specific details
df = split_name_details(df)

In [228]:
def file_check(row, dirs=['DSM', 'Ortho', 'processing_info'], extensions=['*.tif', '*.tif.ovr','*.log', '*_nav.txt', '*_report.pdf']):
    outcols = []
    for d in dirs:
        data_dir = (row['products_dir'] / d)
        has_dir = data_dir.exists()
        outcols.append(has_dir)
        ex = [list(data_dir.glob(e)) for e in extensions]        
        n_files = len(flatten(ex))
        outcols.append(n_files)
    return outcols

In [229]:
def file_check_PC(row, dirs=['PointClouds'], extensions=[ 'PointCloudRGB', 'PointCloudNIR']):
    outcols = []
    for d in dirs:
        data_dir = (row['products_dir'] / d)
        has_dir = data_dir.exists()
        ex = [len(list(data_dir.glob(f'*_{e}_*'))) for e in extensions]
        outcols.append(ex)
    return outcols[0]

In [237]:
#check ortho, dsm and processing_info
file_check_columns = ['DSM', 'Ortho','processing_info']
cols_file_check = flatten([[f"{item}_dir_exists", f"{item}_n_files"] for item in file_check_columns])

file_check_output = df.apply(file_check, dirs=file_check_columns, axis=1)
df = df.join(pd.DataFrame(file_check_output.to_list(), columns=cols_file_check))

In [239]:
# check point cloud files
PC_files = df.iloc[:].apply(file_check_PC, dirs=['PointClouds'], axis=1)
df = df.join(pd.DataFrame(PC_files.to_list(), columns=['PointCloudsRGB_n_files', 'PointCloudsNIR_n_files']))

In [241]:
#check for base files
# has vrt files
df['vrt_exists'] = df.apply(check_files_vrt, axis=1)
# has previews
df['previews_exists'] = df.apply(check_files_previews, axis=1)
# has previews
df['footprints_exists'] = df.apply(check_files_footprints, axis=1)

In [262]:
def check_file_count(df):
    df['valid_count_dsm_ortho_equal'] = df['DSM_n_files'] == df['Ortho_n_files']
    df['valid_count_pcrgb_pcnir_equal'] = df['PointCloudsRGB_n_files'] == df['PointCloudsNIR_n_files']
    df['valid_count_pc_raster_equal'] = df['PointCloudsRGB_n_files']*2 == df['Ortho_n_files']
    return df

In [264]:
df = check_file_count(df)

In [265]:
df.head()

Unnamed: 0,project_name,products_dir,region,site,date,spatial_resolution,subset,DSM_dir_exists,DSM_n_files,Ortho_dir_exists,...,processing_info_dir_exists,processing_info_n_files,PointCloudsRGB_n_files,PointCloudsNIR_n_files,vrt_exists,previews_exists,footprints_exists,valid_count_dsm_ortho_equal,valid_count_pcrgb_pcnir_equal,valid_count_pc_raster_equal
0,NA_AnaktuvukRiverFire_20190722_7cm_01,S:\p_macsprocessing\data_products\NA_Anaktuvuk...,,AnaktuvukRiverFire,20190722,7cm,1,True,134,True,...,True,3,67,67,True,True,True,True,True,True
1,NA_AnaktuvukRiverFire_20190722_7cm_02,S:\p_macsprocessing\data_products\NA_Anaktuvuk...,,AnaktuvukRiverFire,20190722,7cm,2,True,170,True,...,True,3,85,85,True,True,True,True,True,True
2,NA_AnaktuvukRiverFire_20190722_7cm_03,S:\p_macsprocessing\data_products\NA_Anaktuvuk...,,AnaktuvukRiverFire,20190722,7cm,3,True,168,True,...,True,3,82,83,True,True,True,True,False,False
3,NA_AnaktuvukRiverFire_20190722_7cm_04,S:\p_macsprocessing\data_products\NA_Anaktuvuk...,,AnaktuvukRiverFire,20190722,7cm,4,True,170,True,...,True,3,85,85,True,True,True,True,True,True
4,NA_AnaktuvukRiverFire_20190722_7cm_05,S:\p_macsprocessing\data_products\NA_Anaktuvuk...,,AnaktuvukRiverFire,20190722,7cm,5,True,140,True,...,True,3,70,70,True,True,True,True,True,True


## Export
* colored df
* csv
* pdf
* excel?

#### Create styling by column

In [243]:
def color_negative_red(val):
    if not isinstance(val, float):
        return ''
    else:
        color = 'red' if val > 0 else 'white'
        return f'background-color: {color}'

In [271]:
def color_orange(val):
    if not isinstance(val, float):
        return ''
    else:
        color = 'orange' if val == 0 else 'white'
        return f'background-color: {color}'

In [None]:
df['valid_count_dsm_ortho_equal'] = df['DSM_n_files'] == df['Ortho_n_files']
df['valid_count_pcrgb_pcnir_equal'] = df['PointCloudsRGB_n_files'] == df['PointCloudsNIR_n_files']
df['valid_count_pc_raster_equal']

In [291]:
df['all_valid'] = df[subset_exists + subset_valid_counts].all(axis=1) 

In [306]:
subset_cols = [s for s in df.columns if s.endswith('n_files')]
subset_exists = [s for s in df.columns if s.endswith('_exists')]
subset_valid_counts =  [s for s in df.columns if s.startswith('valid_count_')]
subset_valid_styler = ['project_name', 'products_dir', 'all_valid']

In [None]:
def highlight_zero(val):
    color = 'red' if val == 0 else None
    return f'background-color: {color}'

In [293]:
def highlight_invalid(row):
    color = '#FFA500' if row['all_valid'] == False else 'white'
    return [f'background-color: {color}' for _ in row]

In [307]:
df_styled = df.style.background_gradient(cmap='Blues', subset=subset_cols[:], axis=0)\
.background_gradient(cmap='Greens', subset=subset_exists, axis=0, vmin=0, vmax=1)\
.applymap(highlight_zero)\
.apply(highlight_invalid, axis=1, subset=subset_valid_styler)

In [308]:
df_styled

Unnamed: 0,project_name,products_dir,region,site,date,spatial_resolution,subset,DSM_dir_exists,DSM_n_files,Ortho_dir_exists,Ortho_n_files,processing_info_dir_exists,processing_info_n_files,PointCloudsRGB_n_files,PointCloudsNIR_n_files,vrt_exists,previews_exists,footprints_exists,valid_count_dsm_ortho_equal,valid_count_pcrgb_pcnir_equal,valid_count_pc_raster_equal,all_valid
0,NA_AnaktuvukRiverFire_20190722_7cm_01,S:\p_macsprocessing\data_products\NA_AnaktuvukRiverFire_20190722_7cm_01,,AnaktuvukRiverFire,20190722,7cm,1,1,134,1,134,1,3,67,67,1,1,1,1,1,1,1
1,NA_AnaktuvukRiverFire_20190722_7cm_02,S:\p_macsprocessing\data_products\NA_AnaktuvukRiverFire_20190722_7cm_02,,AnaktuvukRiverFire,20190722,7cm,2,1,170,1,170,1,3,85,85,1,1,1,1,1,1,1
2,NA_AnaktuvukRiverFire_20190722_7cm_03,S:\p_macsprocessing\data_products\NA_AnaktuvukRiverFire_20190722_7cm_03,,AnaktuvukRiverFire,20190722,7cm,3,1,168,1,168,1,3,82,83,1,1,1,1,0,0,0
3,NA_AnaktuvukRiverFire_20190722_7cm_04,S:\p_macsprocessing\data_products\NA_AnaktuvukRiverFire_20190722_7cm_04,,AnaktuvukRiverFire,20190722,7cm,4,1,170,1,170,1,3,85,85,1,1,1,1,1,1,1
4,NA_AnaktuvukRiverFire_20190722_7cm_05,S:\p_macsprocessing\data_products\NA_AnaktuvukRiverFire_20190722_7cm_05,,AnaktuvukRiverFire,20190722,7cm,5,1,140,1,140,1,3,70,70,1,1,1,1,1,1,1
5,NA_CapeSimpson_20190719_7cm_01,S:\p_macsprocessing\data_products\NA_CapeSimpson_20190719_7cm_01,,CapeSimpson,20190719,7cm,1,1,118,1,118,1,3,59,59,1,1,1,1,1,1,1
6,NA_CapeSimpson_20190719_7cm_02,S:\p_macsprocessing\data_products\NA_CapeSimpson_20190719_7cm_02,,CapeSimpson,20190719,7cm,2,1,114,1,114,1,3,0,0,1,1,1,1,1,0,0
7,NA_CapeSimpson_20190719_7cm_03,S:\p_macsprocessing\data_products\NA_CapeSimpson_20190719_7cm_03,,CapeSimpson,20190719,7cm,3,1,106,1,106,1,3,0,0,1,1,1,1,1,0,0
8,NA_CapeSimpson_20190719_7cm_04,S:\p_macsprocessing\data_products\NA_CapeSimpson_20190719_7cm_04,,CapeSimpson,20190719,7cm,4,1,100,1,100,1,3,0,0,1,1,1,1,1,0,0
9,NA_CapeSimpson_20190719_7cm_05,S:\p_macsprocessing\data_products\NA_CapeSimpson_20190719_7cm_05,,CapeSimpson,20190719,7cm,5,1,90,1,90,1,3,45,45,1,1,1,1,1,1,1


In [309]:
df_styled.to_html(DIR_BASE / 'processing_status_report.html')

In [310]:
#df_styled.to_excel(DIR_BASE / 'processing_status_report.xlsx')

ModuleNotFoundError: No module named 'openpyxl'