# Create Product report for MACS processing data

## 1. Data loading

## 2. File checks
* check completeness of input dirs and base files
* count tiles
* control for

## Imports

In [None]:
import pandas as pd
from pathlib import Path
import itertools

from utils_report import *

## Setup 
* paths

In [None]:
# setup basepaths
DIR_BASE = Path(r'S:\p_macsprocessing')
DIR_DATA_PRODUCTS = DIR_BASE / 'data_products'
DIR_AOI = DIR_BASE / 'aoi'

In [None]:
# check if directories all exists
for d in [DIR_BASE, DIR_DATA_PRODUCTS, DIR_AOI]:
    assert d.exists()

## Calculate Statistics 
* Files
* File Count
* file count accross types
* aoi (size?)


#### Setup basic Dataframe and split input name

In [None]:
df = pd.DataFrame(columns=['project_name', 'products_dir'])
# create pathlist of output products
dir_list = list(DIR_DATA_PRODUCTS.glob('*'))
df['products_dir'] = dir_list
# get project name
df['project_name'] = df['products_dir'].apply(lambda x: x.name)
# add site specific details
df = split_name_details(df)

In [None]:
#check ortho, dsm and processing_info
file_check_columns = ['DSM', 'Ortho','processing_info']
cols_file_check = flatten([[f"{item}_dir_exists", f"{item}_n_files"] for item in file_check_columns])

file_check_output = df.apply(file_check, dirs=file_check_columns, axis=1)
df = df.join(pd.DataFrame(file_check_output.to_list(), columns=cols_file_check))

In [None]:
# check if aoi exists
df['aoi_exists'] = df.apply(lambda x: (DIR_AOI / f'{x.project_name}.geojson').exists(), axis=1)

In [None]:
# check point cloud files
PC_files = df.iloc[:].apply(file_check_PC, dirs=['PointClouds'], axis=1)
df = df.join(pd.DataFrame(PC_files.to_list(), columns=['PointCloudsRGB_n_files', 'PointCloudsNIR_n_files']))

In [None]:
#check for base files
# has vrt files
df['vrt_exists'] = df.apply(check_files_vrt, axis=1)
# has previews
df['previews_exists'] = df.apply(check_files_previews, axis=1)
# has previews
df['footprints_exists'] = df.apply(check_files_footprints, axis=1)

In [None]:
df = check_file_count(df)

In [None]:
df.head()

## Export
* colored df
* csv
* pdf
* excel?

#### Create styling by column

In [None]:
df['valid_count_dsm_ortho_equal'] = df['DSM_n_files'] == df['Ortho_n_files']
df['valid_count_pcrgb_pcnir_equal'] = df['PointCloudsRGB_n_files'] == df['PointCloudsNIR_n_files']
df['valid_count_pc_raster_equal']

In [None]:
subset_cols = [s for s in df.columns if s.endswith('n_files')]
subset_exists = [s for s in df.columns if s.endswith('_exists')]
subset_valid_counts =  [s for s in df.columns if s.startswith('valid_count_')]
subset_valid_styler = ['project_name', 'products_dir', 'all_valid']

In [None]:
df['all_valid'] = df[subset_exists + subset_valid_counts].all(axis=1) 

In [None]:
df_styled = df.style.background_gradient(cmap='Blues', subset=subset_cols[:], axis=0)\
.background_gradient(cmap='Greens', subset=subset_exists, axis=0, vmin=0, vmax=1)\
.applymap(highlight_zero)\
.apply(highlight_invalid, axis=1, subset=subset_valid_styler)

In [None]:
df_styled

In [None]:
df_styled.to_html(DIR_BASE / 'processing_status_report.html')

In [None]:
#df_styled.to_excel(DIR_BASE / 'processing_status_report.xlsx')