In [1]:
!pip install gdal



In [2]:
!pip install rasterio

Collecting rasterio
[?25l  Downloading https://files.pythonhosted.org/packages/e1/bf/d3c5e7df3828db144a6797269bf3aec31db96c20f13e75b93179eb059955/rasterio-1.2.3-cp37-cp37m-manylinux1_x86_64.whl (19.1MB)
[K     |████████████████████████████████| 19.1MB 227kB/s 
Collecting snuggs>=1.4.1
  Downloading https://files.pythonhosted.org/packages/cc/0e/d27d6e806d6c0d1a2cfdc5d1f088e42339a0a54a09c3343f7f81ec8947ea/snuggs-1.4.7-py3-none-any.whl
Collecting click<8,>=4.0
[?25l  Downloading https://files.pythonhosted.org/packages/d2/3d/fa76db83bf75c4f8d338c2fd15c8d33fdd7ad23a9b5e57eb6c5de26b430e/click-7.1.2-py2.py3-none-any.whl (82kB)
[K     |████████████████████████████████| 92kB 6.7MB/s 
Collecting click-plugins
  Downloading https://files.pythonhosted.org/packages/e9/da/824b92d9942f4e472702488857914bdd50f73021efea15b4cad9aca8ecef/click_plugins-1.1.1-py2.py3-none-any.whl
Collecting cligj>=0.5
  Downloading https://files.pythonhosted.org/packages/42/1e/947eadf10d6804bf276eb8a038bd5307996dceaaa41

In [3]:
!pip install rasterstats

Collecting rasterstats
  Downloading https://files.pythonhosted.org/packages/9f/52/055b2b736e4aa1126c4619a561b44c3bc30fbe48025e6f3275b92928a0a0/rasterstats-0.15.0-py3-none-any.whl
Collecting simplejson
[?25l  Downloading https://files.pythonhosted.org/packages/a8/04/377418ac1e530ce2a196b54c6552c018fdf1fe776718053efb1f216bffcd/simplejson-3.17.2-cp37-cp37m-manylinux2010_x86_64.whl (128kB)
[K     |████████████████████████████████| 133kB 12.4MB/s 
Collecting fiona
[?25l  Downloading https://files.pythonhosted.org/packages/ea/2a/404b22883298a3efe9c6ef8d67acbf2c38443fa366ee9cd4cd34e17626ea/Fiona-1.8.19-cp37-cp37m-manylinux1_x86_64.whl (15.3MB)
[K     |████████████████████████████████| 15.3MB 174kB/s 
Collecting munch
  Downloading https://files.pythonhosted.org/packages/cc/ab/85d8da5c9a45e072301beb37ad7f833cd344e04c817d97e0cc75681d248f/munch-2.5.0-py2.py3-none-any.whl
Installing collected packages: simplejson, munch, fiona, rasterstats
Successfully installed fiona-1.8.19 munch-2.5.0 rast

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
# -*- coding: utf-8 -*-

############################ Import libraries ############################

import numpy as np
import gdal
import json
import rasterio
from rasterio.features import rasterize
import zipfile
import pandas as pd
import re
import os
import os.path
from pathlib import Path
import errno
from shapely.geometry import Polygon
from scipy.special import expit

############################ Auxiliary functions ############################


def get_list_of_data_files(folder_name, extension):
    current_directory = str(Path().absolute())

    root_folder = os.path.join(current_directory, folder_name)

    # list all files in folder
    list_all_files = []
    for path, subdirs, files in os.walk(root_folder):
        for name in files:
            list_all_files.append(os.path.join(path, name))

    # list data files
    list_data_file = [f for f in list_all_files if re.search(extension + '$', f)]

    return list_data_file

#Get commune.geojson data
def open_Communes():
    try:
        with open('/content/drive/My Drive/Data for Good (saison 9) - projet GeoWatch Labs/Communes.geojson') as json_file:
            data = json.load(json_file)

        commune_id = [commune['properties']['ID_3'] for commune in data['features']]
        commune_dict = {commune['properties']['ADM3_REFNA'] : commune['properties']['ID_3'] for commune in data['features']}
        geometry = [Polygon(commune['geometry']['coordinates'][0][0]) for commune in data['features']]
        
        return commune_dict, commune_id, geometry

    except:
        raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), '/content/drive/My Drive/Data for Good (saison 9) - projet GeoWatch Labs/Communes.geojson')

#Get bouding box of tiff image
def GetExtent(ds):
    geo_t = ds.GetGeoTransform()
    x_size, y_size = ds.RasterXSize, ds.RasterYSize
    xmin = min(geo_t[0], geo_t[0] + x_size * geo_t[1])
    xmax = max(geo_t[0], geo_t[0] + x_size * geo_t[1])
    ymin = min(geo_t[3], geo_t[3] + y_size * geo_t[5])
    ymax = max(geo_t[3], geo_t[3] + y_size * geo_t[5])
    return xmin, xmax, ymin, ymax

#Convert tiff to numpy matrix with bounding box
def convert_one_band_raster_to_mappable(path_raster):
  #Open raster file
  ds = gdal.Open(path_raster)
  if ds is None:
      print('Could not open')

  #Get coordinates, cols and rows
  cols = ds.RasterXSize
  rows = ds.RasterYSize

  xmin, xmax, ymin, ymax = GetExtent(ds)

  #Raster convert to array in numpy
  bands = ds.RasterCount
  band=ds.GetRasterBand(1)
  dataset= expit(band.ReadAsArray(0,0,cols,rows))

  return dataset, xmin, xmax, ymin, ymax


In [6]:
############################ code ############################

print("------------- data unzipped ---------------")

data_files_list = get_list_of_data_files("/content/drive/My Drive/Data for Good (saison 9) - projet GeoWatch Labs/GeoWatch Labs Agricultural Maps/Historical Yields", ".tif")
nb_valid_files = 0

print("------------- initializing ----------------")

commune_to_yield_avg_by_year_by_crop = {"2010" : {}, "2011" : {}, "2012" : {}, "2013" : {}, "2014" : {}}
commune_dict, commune_id, geometry = open_Communes()
crops = {}

print("------------- data processing --------------")

year = "2009"

for data_file_index in range(len(data_files_list)):

    print(round(data_file_index/len(data_files_list)*100), " %")

    data_file_name = data_files_list[data_file_index]

    if year != data_file_name.split("/")[-4]:
      year = data_file_name.split("/")[-4]
      dataset_pop, xmin_pop, xmax_pop, ymin_pop, ymax_pop= convert_one_band_raster_to_mappable("/content/drive/My Drive/Data for Good (saison 9) - projet GeoWatch Labs/Groupe 3 - Marchés Alimentaires/images/population images/" + year + "_population.tif")

    dataset, xmin, xmax, ymin, ymax= convert_one_band_raster_to_mappable(data_file_name)
    affine = rasterio.transform.from_bounds(xmin,ymin, xmax, ymax, dataset.shape[0], dataset.shape[1])
    affine_pop = rasterio.transform.from_bounds(xmin_pop, ymin_pop, xmax_pop, ymax_pop, dataset_pop.shape[0], dataset_pop.shape[1])

    for id in range(len(geometry)):

        mask = rasterize(shapes=[geometry[id]],
                 out_shape=dataset.shape,
                 transform= affine)
        mask_pop = rasterize(shapes=[geometry[id]],
                 out_shape=dataset_pop.shape,
                 transform= affine_pop)

        mask_sum = np.sum(mask)
        if mask_sum > 0:
            mean = np.sum(dataset * mask) / mask_sum / np.sum(mask_pop)
        else:
            mean = 0

        if not commune_id[id] in commune_to_yield_avg_by_year_by_crop[data_file_name.split("/")[-4]].keys():
          commune_to_yield_avg_by_year_by_crop[data_file_name.split("/")[-4]][commune_id[id]] = {data_file_name.split("/")[-2] : mean}
        else:
          if not data_file_name.split("/")[-2] in commune_to_yield_avg_by_year_by_crop[data_file_name.split("/")[-4]][commune_id[id]].keys():
            commune_to_yield_avg_by_year_by_crop[data_file_name.split("/")[-4]][commune_id[id]][data_file_name.split("/")[-2]] = mean
          else:
            commune_to_yield_avg_by_year_by_crop[data_file_name.split("/")[-4]][commune_id[id]][data_file_name.split("/")[-2]] = max(mean, commune_to_yield_avg_by_year_by_crop[data_file_name.split("/")[-4]][commune_id[id]][data_file_name.split("/")[-2]])
          


------------- data unzipped ---------------
------------- initializing ----------------
------------- data processing --------------
0  %
0  %
1  %
1  %
1  %
1  %
2  %
2  %
2  %
3  %
3  %
3  %
3  %
4  %
4  %
4  %
5  %
5  %
5  %
6  %
6  %
6  %
6  %
7  %
7  %
7  %
8  %
8  %
8  %
8  %
9  %
9  %
9  %
10  %
10  %
10  %
10  %
11  %
11  %
11  %
12  %
12  %
12  %
12  %
13  %
13  %
13  %
14  %
14  %
14  %
14  %
15  %
15  %
15  %
16  %
16  %
16  %
17  %
17  %
17  %
17  %
18  %
18  %
18  %
19  %
19  %
19  %
19  %
20  %
20  %
20  %
21  %
21  %
21  %
21  %
22  %
22  %
22  %
23  %
23  %
23  %
23  %
24  %
24  %
24  %
25  %
25  %
25  %
26  %
26  %
26  %
26  %
27  %
27  %
27  %
28  %
28  %
28  %
28  %
29  %
29  %
29  %
30  %
30  %
30  %
30  %
31  %
31  %
31  %
32  %
32  %
32  %
32  %
33  %
33  %
33  %
34  %
34  %
34  %
34  %
35  %
35  %
35  %
36  %
36  %
36  %
37  %
37  %
37  %
37  %
38  %
38  %
38  %
39  %
39  %
39  %
39  %
40  %
40  %
40  %
41  %
41  %
41  %
41  %
42  %
42  %
42  %
43  %
43  %
43  %


In [7]:
data = pd.read_csv("/content/drive/My Drive/Data for Good (saison 9) - projet GeoWatch Labs/Groupe 3 - Marchés Alimentaires/aggregated_match_for_FSMS_files.csv")

new_columns = {"groundnut" : [], "millet" : [], "sorghum" : [], "maize" : [], "cowpea" : []}

for row in data.index:
    if str(data.loc[row, "moughataa"]) != "nan":
        year = str(data.loc[row, "year"]-1)
        if '.0' in str(data.loc[row, "moughataa"]):
            village = str(data.loc[row, "moughataa"])[:-2]
            yields = commune_to_yield_avg_by_year_by_crop[year][village]
            for culture in yields.keys():
                new_columns[culture] = yields[culture]
        elif data.loc[row, "moughataa"] in commune_dict.keys():
            village = commune_dict[data.loc[row, "moughataa"]]
            yields = commune_to_yield_avg_by_year_by_crop[year][village]
            for culture in yields.keys():
                new_columns[culture] = yields[culture]
        else:
            for culture in yields.keys():
                new_columns[culture] = ""
    else:
        for culture in yields.keys():
            new_columns[culture] = ""

for culture in new_columns.keys():
    data[culture] = new_columns[culture]

  interactivity=interactivity, compiler=compiler, result=result)


In [9]:
data.to_csv("/content/drive/My Drive/Data for Good (saison 9) - projet GeoWatch Labs/Groupe 3 - Marchés Alimentaires/aggregated_match_for_FSMS_files_with_yields.csv")