In [38]:
import os
import sys
from matplotlib import pyplot
import time
import geopandas
import pandas as pd

# Enable importing scripts from ../scripts folder
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path+"/scripts")
    
from destruction_utilities import *

In [39]:
DEBUG = False
CITY = 'aleppo'
TILE_SIZE = [128,128]
REFRESH_SAMPLE = True
ZERO_DAMAGE_BEFORE_YEAR = 2012
PRE_IMAGE_INDEX = [0]

WINDOW=True
WINDOW_SIZE=(20,20)

DATA_DIR = "../../data"

In [40]:
image      = search_data(pattern(city=CITY, type='image'), directory=DATA_DIR)[0]
settlement = search_data(f'{CITY}_settlement.*gpkg$', directory=DATA_DIR)
noanalysis = search_data(f'{CITY}_noanalysis.*gpkg$', directory=DATA_DIR)

In [41]:
profile    = tiled_profile(image, tile_size=(*TILE_SIZE, 1))
settlement = rasterise(settlement, profile, dtype='bool')
noanalysis = rasterise(noanalysis, profile, dtype='bool')
analysis   = np.logical_and(settlement, np.invert(noanalysis))

In [42]:
# Debug: display settlement, analysis, and no analysis zones
if DEBUG:
    image = read_raster(image)
    print("Original image size:", image.shape)
    display_multiple([settlement, noanalysis, analysis], labels=["settlements", "noanalysis", "analysis"])

In [43]:
del image, settlement, noanalysis

In [44]:
if REFRESH_SAMPLE:
    # Splits samples
    np.random.seed(1)
    index   = dict(training=0.70, validation=0.15, test=0.15)
    index   = np.random.choice(np.arange(len(index)) + 1, np.sum(analysis), p=list(index.values()))
    samples = analysis.astype(int)
    np.place(samples, analysis, index)
    write_raster(samples, profile, f'{DATA_DIR}/{CITY}/others/{CITY}_samples.tif', nodata=-1, dtype='int8')
    del index, samples, analysis

In [45]:
# DEBUG Visualize Sample
if DEBUG:
    samples = read_raster(f'{DATA_DIR}/{CITY}/others/{CITY}_samples.tif')
    display(samples)

In [46]:
# Calculate labels for each tile..
# Reads damage reports
damage = search_data(f'{CITY}_damage.*gpkg$', directory=DATA_DIR)
damage = geopandas.read_file(damage)

last_annotation_date = sorted(damage.columns)[-2]

In [47]:
# DEBUG: Plot damage annotations and print last annotation date
if DEBUG:
    damage.plot()
    print(last_annotation_date)
    damage.sample(3)

In [48]:
# Extract report dates
dates = search_data(pattern(city=CITY, type='image'), directory=DATA_DIR)
dates = extract(dates, '\d{4}_\d{2}_\d{2}')
dates= list(map(lambda x: x.replace("_", "-"), dates))


In [49]:
# DEBUG: Print out dates
if DEBUG:
    dates

In [50]:
known_dates = sorted(damage.drop('geometry', axis =1).columns)
damage[list(set(dates) - set(damage.columns))] = np.nan
damage = damage.reindex(sorted(damage.columns), axis=1)

In [51]:
# DEBUG
if DEBUG:
    damage.sample(3)

In [52]:
pre_cols = [col for col in sorted(damage.drop('geometry', axis=1)).columns if int(col.split("-")[0]) < ZERO_DAMAGE_BEFORE_YEAR]

for i, col in sorted(damage.drop('geometry', axis=1)).columns:
    if i in PRE_IMAGE_INDEX:
        pre_cols.append(i)

In [53]:
# DEBUG
if DEBUG:
    pre_cols

In [17]:
damage[pre_cols] = 0.0

In [18]:
# DEBUG
if DEBUG:
    damage.sample(3)

In [19]:
# BUG FOUND
## Replace:
post_cols = [col for col in damage.drop('geometry', axis=1).columns if time.strptime(col, "%Y-%m-%d") > time.strptime(last_annotation_date, "%Y-%m-%d")]
## With:
post_cols = sorted([col for col in damage.drop('geometry', axis=1).columns if int(col.split("-")[0]) > int(ZERO_DAMAGE_BEFORE_YEAR)])

In [20]:
# DEBUG
if DEBUG:
    post_cols

In [21]:
print(known_dates)
last_known_date = known_dates[0]
for col in post_cols:
    print(col)
    if col in known_dates and time.strptime(col, "%Y-%m-%d") >= time.strptime(last_known_date, "%Y-%m-%d"):
        last_known_date = col
        if(known_dates.index(col) < len(known_dates)-1):
            next_known_date = known_dates[known_dates.index(col)+1]
            print("\tHit known date", {col})
            print("\tNext known date", {next_known_date})
            dates_between = post_cols[post_cols.index(last_known_date)+1:post_cols.index(next_known_date)]
            print("\tInbetweeners", dates_between)
            zeros = list(*np.where(damage[next_known_date] == 0.0))
            not_equal = list(*np.where(damage[last_known_date] != damage[next_known_date]))
#             uncertains = list(set(zeros).intersection(set(not_equal)))
            for date in dates_between:
                print(f"\tShould I uncertain {date} ({len(not_equal)}) using {last_known_date} and {next_known_date}")
                damage.loc[not_equal, date] = -1
            
            

['2013-09-23', '2014-05-23', '2015-04-26', '2015-05-01', '2016-09-18']
2013-05-26
2013-09-23
	Hit known date {'2013-09-23'}
	Next known date {'2014-05-23'}
	Inbetweeners []
2014-05-23
	Hit known date {'2014-05-23'}
	Next known date {'2015-04-26'}
	Inbetweeners ['2014-07-14']
	Should I uncertain 2014-07-14 (5198) using 2014-05-23 and 2015-04-26
2014-07-14
2015-04-26
	Hit known date {'2015-04-26'}
	Next known date {'2015-05-01'}
	Inbetweeners []
2015-05-01
	Hit known date {'2015-05-01'}
	Next known date {'2016-09-18'}
	Inbetweeners ['2016-03-29']
	Should I uncertain 2016-03-29 (35737) using 2015-05-01 and 2016-09-18
2016-03-29
2016-09-18


In [22]:
if DEBUG:
    damage

In [23]:
if DEBUG:
    uncertains = list(*np.where(np.logical_and(damage['2014-05-23'] == 2, damage['2015-04-26'] == 0)))
    damage.iloc[uncertains, :]

In [24]:
if DEBUG:
    for col in post_cols:
        zeros = list(*np.where(damage[col] == 0.0))
        nulls = list(*np.where(damage[col].isnull()))
        others = list(*np.where(np.logical_and(damage[col].notnull(), damage[col] != 0.0)))
        print(f'{col}')
        print(f"{col}, Zeros: ", len(zeros))
        print(f"{col}, Nulls: ", len(nulls))
        print(f"{col}, Others: ", len(others))
        cols_before_date = [c for c in post_cols if time.strptime(c, "%Y-%m-%d")  < time.strptime(col, "%Y-%m-%d") ]
        print(f"Backfill Candidates: {cols_before_date}")
    #     for date in cols_before_date:
    #         print(damage[date].isnull().sum())
    #         damage.loc[nulls, date] = 99
    print(known_dates)




In [25]:
filled = []
last_known_date = None
for j, col in enumerate(post_cols):
    zeros = list(*np.where(damage[col] == 0.0))
    cols_before_date = [c for c in post_cols if time.strptime(c, "%Y-%m-%d")  < time.strptime(col, "%Y-%m-%d") ]
    for i, date in enumerate(cols_before_date):       
        if date not in filled and date not in known_dates:
            print(f"Backfilling {date} using {col}")
            zeros = list(*np.where(damage[col] == 0.0))
            uncertains = list(*np.where(damage[date] != -1))
            n_uncertains = list(set(zeros).intersection(set(uncertains)))
            damage.loc[n_uncertains, date] = 0.0
            filled.append(date)    

Backfilling 2013-05-26 using 2013-09-23
Backfilling 2014-07-14 using 2015-04-26
Backfilling 2016-03-29 using 2016-09-18


In [26]:
# DEBUG
if DEBUG:
    for col in post_cols:
        zeros = list(*np.where(damage[col] == 0.0))
        nulls = list(*np.where(damage[col].isnull()))
        others = list(*np.where(np.logical_and(damage[col].notnull(), damage[col] != 0.0)))
        print(f'{col}')
        print(f"{col}, Zeros: ", len(zeros))
        print(f"{col}, Nulls: ", len(nulls))
        print(f"{col}, Others: ", len(others), "\n\n")


In [27]:
if DEBUG:
    geometry = damage.geometry
    damage_ = damage.drop('geometry', axis=1)
    damage_ = damage_.T
    damage_.fillna(method = 'ffill') != damage_.fillna(method = 'bfill')

In [28]:
# Label the uncertain class
geometry = damage.geometry
damage_ = damage.drop('geometry', axis=1)
damage_ = damage_.T
for col in damage_.columns:
    uncertains = np.where(damage_[col].fillna(method='ffill') != damage_[col].fillna(method='bfill'))
    damage_.iloc[uncertains, col] = -1
damage = damage_.T
damage['geometry'] = geometry
damage = geopandas.GeoDataFrame(damage)

In [29]:
if DEBUG:
    geometry = damage.geometry
    damage_ = damage.drop('geometry', axis=1)
    damage_ = damage_.T
    damage_ = damage_.fillna(method='ffill')
    damage_

In [30]:
# Forward fill the rest
geometry = damage.geometry
damage_ = damage.drop('geometry', axis=1)
damage_ = damage_.T
damage_ = damage_.fillna(method='ffill')
damage = damage_.T
damage['geometry'] = geometry
damage = geopandas.GeoDataFrame(damage)

In [31]:
if DEBUG:
    for col in damage.columns:
        print(f"{col}:")
        print(f"NAs: {damage[col].isnull().sum()}")
        print(f"Value counts")
        if col != 'geometry':
            print(damage[col].value_counts(), "\n\n")

In [32]:
if DEBUG:
    def get_counts(dam, name="combos.csv"):
        geometry = dam.geometry
        damage_ = dam.drop('geometry', axis=1)
    #     print(known_dates)
        counts = damage_.reset_index().groupby(list(damage_.columns)).count().reset_index().sort_values('index', ascending=False)
        counts.to_csv(name)
        return counts

    get_counts(damage)

In [33]:
# Writes damage labels
for date in damage.drop('geometry', axis=1).columns:
    print(f'------ {date}')
    subset = damage[[date, 'geometry']].sort_values(by=date) # Sorting takes the max per pixel
    subset = rasterise(subset, profile, date)
    write_raster(subset, profile, f'{DATA_DIR}/{CITY}/labels/label_{date}.tif', nodata=-1, dtype='int8')
del date, subset

------ 2011-01-01
------ 2013-05-26
------ 2013-09-23
------ 2014-05-23
------ 2014-07-14
------ 2015-04-26
------ 2015-05-01
------ 2016-03-29
------ 2016-09-18


In [34]:
if DEBUG:
    subsets = []
    for date in damage.drop('geometry', axis=1).columns:
        print(f'------ {date}')
        subset = damage[[date, 'geometry']].sort_values(by=date) # Sorting takes the max per pixel
#         subset = rasterise(subset, profile, date)
        subsets.append(subset)

#     display_multiple([*subsets], cmap="Reds")

In [35]:
samples = read_raster(f'{DATA_DIR}/{CITY}/others/{CITY}_samples.tif')
images  = search_data(pattern(city=CITY, type='image'), directory=DATA_DIR)
labels  = search_data(pattern(city=CITY, type='label'), directory=DATA_DIR)

In [36]:
read_raster(images[2])

array([[[123., 105.,  90.],
        [123., 101.,  82.],
        [115.,  97.,  82.],
        ...,
        [ 82.,  69.,  74.],
        [ 99.,  85.,  99.],
        [132., 117., 123.]],

       [[115.,  97.,  82.],
        [107.,  89.,  74.],
        [107.,  85.,  66.],
        ...,
        [ 74.,  57.,  66.],
        [ 99.,  81.,  90.],
        [123., 109., 115.]],

       [[115.,  93.,  82.],
        [115.,  93.,  74.],
        [107.,  89.,  74.],
        ...,
        [ 74.,  65.,  74.],
        [ 99.,  85.,  90.],
        [115., 101., 107.]],

       ...,

       [[ 33.,  28.,  16.],
        [ 58.,  49.,  41.],
        [ 74.,  65.,  58.],
        ...,
        [115.,  73.,  66.],
        [115.,  73.,  66.],
        [115.,  73.,  66.]],

       [[ 49.,  40.,  33.],
        [ 58.,  53.,  41.],
        [ 74.,  65.,  49.],
        ...,
        [115.,  73.,  66.],
        [115.,  73.,  66.],
        [115.,  77.,  74.]],

       [[ 66.,  61.,  49.],
        [ 66.,  57.,  49.],
        [ 66.,  

In [37]:
center_window(images[2], size=WIN)

SyntaxError: invalid syntax (2684372231.py, line 1)