In [1]:
import rasterio
import rasterio.plot
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
def get_lon_lat_coordinates(filepath="../../data/features/pop.tif"):
    # Code comes from http://xarray.pydata.org/en/v0.10.0/auto_gallery/plot_rasterio.html
    import numpy as np
    import xarray as xr
    from rasterio.warp import transform
    
    # Read the data
    da = xr.open_rasterio(filepath)

    # Compute the lon/lat coordinates with rasterio.warp.transform
    ny, nx = len(da['y']), len(da['x'])
    x, y = np.meshgrid(da['x'], da['y'])

    # Rasterio works with 1D arrays
    lon, lat = transform(da.crs, {'init': 'EPSG:4326'},
                         x.flatten(), y.flatten())

    lon = np.asarray(lon).reshape((ny, nx))
    lat = np.asarray(lat).reshape((ny, nx))
    
    # arrays with with shapes (849, 1469)
    return lon, lat
    #da.coords['lon'] = (('y', 'x'), lon)
    #da.coords['lat'] = (('y', 'x'), lat)

In [3]:
def get_featureData():
    featureData = pd.DataFrame()
    featureTifs = ["access", "pet", "pop", "urban", "W_access"]
    shape = (849, 1469)
    # Initiliase
    raster_na_value = None

    # get longitude and latitude
    lon, lat = get_lon_lat_coordinates("../../data/features/{}.tif".format(featureTifs[0]))
    lon_1d_array = lon.reshape(-1)
    lat_1d_array = lat.reshape(-1)
    featureData["longitude"] = lon_1d_array
    featureData["latitude"] = lat_1d_array

    for feature in featureTifs:
        filepath = "../../data/features/{}.tif".format(feature)
        # read raster data band1 from the .tif file
        rasterOpen = rasterio.open(filepath)
        rasterData = rasterOpen.read(1)
        rasterData_1d_array = rasterData.reshape(-1)
        featureData[feature] = rasterData_1d_array

        # raster data defines -3.4e+38 as the value, when data is missing
        raster_na_value = np.float32(rasterOpen.nodatavals[0])
        rasterOpen.close()
    
    return featureData, raster_na_value, shape

In [4]:
featureData, raster_na_value, shape = get_featureData()

In [5]:
#featureData.to_csv("../../data/features/features.csv", index=True, index_label="index", na_rep="NA")

In [6]:
cleanedFeatureData = featureData.replace(raster_na_value,np.NaN).dropna()

In [7]:
#cleanedFeatureData.to_csv("../../data/features/cleaned-features.csv", index=True, index_label="index", na_rep="NA")

In [8]:
def get_predictions(data):
    import tensorflow as tf
    model = tf.keras.models.load_model("../../best-models/two-hidden-layers")
    
    # Normalize data as 
    data = cleanedFeatureData.copy()
    data["longitude"] = data["longitude"] / 180
    data["latitude"] = data["latitude"] / 90
    
    predictions = model.predict(data.values, batch_size=32)
    return predictions

In [9]:
predictions = get_predictions(cleanedFeatureData)

In [10]:
predictions

array([[0.6315159 ],
       [0.64335734],
       [0.68598884],
       ...,
       [0.9166327 ],
       [0.69203144],
       [0.8945213 ]], dtype=float32)

In [11]:
adj_cases = np.exp(predictions.reshape(-1))-1

In [12]:
np.min(adj_cases)

-0.55169773

In [13]:
# replace predicted negative values with zeros
adj_cases_non_negative = adj_cases.copy()
adj_cases_non_negative[adj_cases_non_negative < 0] = 0

In [203]:
cleanedFeatureData["adj_cases"] = adj_cases_non_negative

In [17]:
np.min(adj_cases_non_negative)

0.0

In [205]:
cleanedFeatureData.head()

Unnamed: 0,longitude,latitude,access,pet,pop,urban,W_access,adj_cases
1192,123.270833,53.5625,0.20218,-1.555513,-0.626523,-0.482502,0.567165,0.880459
1193,123.3125,53.5625,0.179225,-1.547939,-0.682925,-0.482502,0.564734,0.902859
1194,123.354167,53.5625,-0.046382,-1.541605,-0.708398,-0.482502,0.563129,0.985734
2656,123.0625,53.520833,0.31596,-1.572818,-0.57757,0.778462,0.571596,0.0
2657,123.104167,53.520833,0.297046,-1.566935,-0.683404,-0.482502,0.570844,0.862802


In [206]:
featureData["adj_cases"] = raster_na_value

In [208]:
featureData.loc[cleanedFeatureData.index, "adj_cases"] = cleanedFeatureData["adj_cases"]
#featureData = featureData.fillna(raster_na_value)

In [210]:
featureData.describe()

Unnamed: 0,longitude,latitude,access,pet,pop,urban,W_access,adj_cases
count,1247181.0,1247181.0,1247181.0,1247181.0,1247181.0,1247181.0,1247181.0,1247181.0
mean,104.1875,35.89583,-inf,-inf,-inf,-inf,-inf,-inf
std,17.66933,10.21188,inf,inf,inf,inf,inf,inf
min,73.60417,18.22917,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38
25%,88.89583,27.0625,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38
50%,104.1875,35.89583,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38
75%,119.4792,44.72917,-0.1701949,-0.170228,-0.1730415,-0.4825017,-0.1699138,0.9473325
max,134.7708,53.5625,4.630908,4.63091,4.6305,4.630875,4.630948,50.02286


In [213]:
featureData.loc[2656]

longitude    123.062500
latitude      53.520833
access         0.315960
pet           -1.572818
pop           -0.577570
urban          0.778462
W_access       0.571596
adj_cases      0.000000
Name: 2656, dtype: float64

In [217]:
adj_cases_raster_values = featureData["adj_cases"].values.reshape(shape)

In [218]:
adj_cases_raster_values

array([[-3.4e+38, -3.4e+38, -3.4e+38, ..., -3.4e+38, -3.4e+38, -3.4e+38],
       [-3.4e+38, -3.4e+38, -3.4e+38, ..., -3.4e+38, -3.4e+38, -3.4e+38],
       [-3.4e+38, -3.4e+38, -3.4e+38, ..., -3.4e+38, -3.4e+38, -3.4e+38],
       ...,
       [-3.4e+38, -3.4e+38, -3.4e+38, ..., -3.4e+38, -3.4e+38, -3.4e+38],
       [-3.4e+38, -3.4e+38, -3.4e+38, ..., -3.4e+38, -3.4e+38, -3.4e+38],
       [-3.4e+38, -3.4e+38, -3.4e+38, ..., -3.4e+38, -3.4e+38, -3.4e+38]],
      dtype=float32)

In [219]:
np.nanmax(adj_cases_raster_values)

50.02286

In [266]:
def save_raster():
    filepath = "../../data/features/{}.tif".format("access")
    # read raster data band1 from the .tif file

    adj_cases_raster_opened = rasterio.open(filepath)
    adj_cases_raster = rasterio.open(
     "../../data/predictions/adj_cases.tif",
     'w',
     driver='GTiff',
     height=adj_cases_raster_values.shape[0],
     width=adj_cases_raster_values.shape[1],
     count=1,
     dtype=adj_cases_raster_values.dtype,
     crs=adj_cases_raster_opened.crs,
     transform=adj_cases_raster_opened.transform,
     nodata=adj_cases_raster_opened.nodata
    )

    adj_cases_raster.write(adj_cases_raster_values, 1)
    adj_cases_raster.close()

In [267]:
save_raster()