In [1]:
import os
import pandas as pd
import rasterio

In [2]:
def is_x_constant(raster):
    return raster.xy(0, 0)[0] == raster.xy(raster.height-1, raster.width-1)[0]

def is_y_constant(raster):
    return raster.xy(0, 0)[1] == raster.xy(raster.height-1, raster.width-1)[1]

files = os.listdir('../data/city_rasters/')
cities = ['aleppo', 'damascus', 'raqqa', 'daraa', 'homs', 'idlib', 'hama', 'deir']
good_files, bad_files = [], []
for city in cities:
    city_files = [file for file in files if city in file]
    for file in city_files:
        raster = rasterio.open(f'../data/city_rasters/{file}')
        x_is_constant, y_is_constant = is_x_constant(raster), is_y_constant(raster)
        
        if x_is_constant or y_is_constant:
            bad_files.append((file, x_is_constant, y_is_constant))
            
        if not x_is_constant and not y_is_constant:
            good_files.append(file)

On these files, either the latitude or the longitude (or both) are equal on the boundaries of the image (we assume it's contant across the entire image). Hence, we cannot use them for linking them with the annotation.

In [3]:
bad_files

[('raqqa_2016_07_01_zoom_19.tif', False, True),
 ('raqqa_2014_03_21_zoom_19.tif', False, True),
 ('daraa_2014_05_01_zoom_19.tif', True, True),
 ('daraa_2016_02_25_zoom_19.tif', True, True),
 ('daraa_2013_11_10_zoom_19.tif', True, True),
 ('daraa_2016_04_19_zoom_19.tif', True, True),
 ('homs_2016_05_30_zoom_19.tif', True, True),
 ('homs_2013_10_31_zoom_19.tif', True, True),
 ('idlib_2014_05_31_zoom_19.tif', True, True),
 ('idlib_2014_02_07_zoom_19.tif', True, True),
 ('hama_2016_07_29_zoom_19.tif', True, True),
 ('hama_2013_10_31_zoom_19.tif', True, True),
 ('hama_2014_04_03_zoom_19.tif', True, True),
 ('deir_2014_09_16_zoom_19.tif', True, True),
 ('deir_2013_10_24_zoom_19.tif', True, True)]

On these files, that problem is not present

In [4]:
good_files

['aleppo_2013_05_26_zoom_19.tif',
 'aleppo_2011_06_26_zoom_19.tif',
 'aleppo_2014_07_14_zoom_19.tif',
 'aleppo_2014_05_23_zoom_19.tif',
 'aleppo_2013_09_23_zoom_19.tif',
 'aleppo_2016_09_18_zoom_19.tif',
 'aleppo_2015_10_26_zoom_19.tif',
 'aleppo_2016_10_19_zoom_19.tif',
 'aleppo_2015_11_22_zoom_19.tif',
 'aleppo_2013_10_31_zoom_19.tif',
 'damascus_2011_08_22_zoom_19.tif',
 'damascus_2017_01_22_zoom_19.tif',
 'raqqa_2013_01_17_zoom_19.tif',
 'raqqa_2015_02_02_zoom_19.tif',
 'daraa_2011_10_17_zoom_19.tif',
 'daraa_2017_02_07_zoom_19.tif',
 'homs_2014_04_03_zoom_19.tif',
 'homs_2011_05_21_zoom_19.tif',
 'idlib_2011_07_31_zoom_19.tif',
 'idlib_2016_08_01_zoom_19.tif',
 'hama_2016_06_30_zoom_19.tif',
 'hama_2012_02_22_zoom_19.tif',
 'deir_2016_05_25_zoom_19.tif',
 'deir_2012_12_05_zoom_19.tif']

If images have the same dimensions, we could try to infer the coordinates from those images that work.

In [7]:
files = os.listdir('../data/city_rasters/')
cities = ['aleppo', 'damascus', 'raqqa', 'daraa', 'homs', 'idlib', 'hama', 'deir']
dimensions = []
for city in cities:
    city_files = [file for file in files if city in file]
    for file in city_files:
        raster = rasterio.open(f'../data/city_rasters/{file}')
        dimensions.append({
            'city': city,
            'width': raster.width,
            'height': raster.height,
            'x_is_constant': is_x_constant(raster),
            'y_is_constant': is_y_constant(raster)
        })

In [11]:
pd.DataFrame(dimensions).groupby('city')['height'].unique()

city
aleppo      [25088]
damascus     [8192]
daraa       [12032]
deir        [11264]
hama        [15616]
homs        [18176]
idlib        [8704]
raqqa        [8960]
Name: height, dtype: object

In [12]:
pd.DataFrame(dimensions).groupby('city')['width'].unique()

city
aleppo      [37376]
damascus    [11776]
daraa       [13312]
deir        [12032]
hama        [15616]
homs        [20736]
idlib       [10752]
raqqa       [23552]
Name: width, dtype: object

In [5]:
raster = rasterio.open(f'../data/test/homs_2016_05_30_v3/homs_2016_05_30_v3_zoom_19.tif')
x_is_constant, y_is_constant = is_x_constant(raster), is_y_constant(raster)

if x_is_constant or y_is_constant:
    bad_files.append((file, x_is_constant, y_is_constant))

if not x_is_constant and not y_is_constant:
    good_files.append(file)

In [6]:
x_is_constant

True

In [8]:
raster.xy(10000, 10000)

(36.0, 34.0)