In [1]:
import geopandas as gpd
import pandas as pd
from requests.auth import HTTPBasicAuth
import requests
import json
import plotly.express as px
import os
import rtree
import numpy as np
from shapely.geometry import Polygon

In [3]:
def make_grid(bounds, size):
    xmin, ymin, xmax, ymax = bounds

    length = size
    wide = size

    cols = list(np.arange(xmin, xmax + size, size))
    rows = list(np.arange(ymin, ymax + size, size))

    polygons = []
    for x in cols[:-1]:
        for y in rows[:-1]:
            polygons.append(Polygon([(x,y), (x+size, y), (x+size, y+length), (x, y+size)]))

    grid = gpd.GeoDataFrame({'geometry':polygons})
    grid = grid.set_crs(epsg=4326)
    grid['centroid'] = grid.centroid
    grid = grid.to_crs(epsg=4326)
    return grid

In [4]:
#function that joins fewsnet data to grid with list of local files
dates_list = []
unmerged_files = []
def join_to_grid(grid_gdf, polygon_file_list):
    grid_gdf = grid_gdf.set_geometry('centroid')
    grid_gdf = grid_gdf.to_crs(epsg=4326)
    filenum = 1
    for file in polygon_file_list:
        try:
            print ("joining file " + str(filenum) + " " + str(file))
            tempgdf = gpd.read_file(file)
            tempgdf = tempgdf.to_crs(epsg=4326)
            month = file[-9:-7]
            year = file[-13:-9]
            datestring = year + "-" + month
            tempgdf = tempgdf.add_suffix("-"+datestring)
            tempgdf = tempgdf.rename(columns={"geometry-"+datestring: "geometry"})
            tempgdf = tempgdf.set_geometry('geometry')
            # print column name
            print ("CS-"+year+'-'+month)
            # the actual spatial join
            grid_gdf = grid_gdf.sjoin(tempgdf, how="left")
            print ("with duplicates dataframe is " + str(grid_gdf.shape))
            grid_gdf = grid_gdf.drop(columns=['index_right'])
            # print column names of grid_gdf
            # print (grid_gdf.columns)
            # this part deals with the merging of columns when reports are from the same year and month
            # the result of this is the data is all joined into one column. 
            # There should not be any duplicate geometries measured in these reports as they are from different regions
            # if there are duplicate geometries, the data that was joined first will be kept
            # we handle duplicate geometries from the same reports in the next section keeping the higher measurement
            if "CS-"+datestring+"_left" in grid_gdf.columns:
                try:
                    grid_gdf["CS-"+datestring+"_left"].fillna(grid_gdf["CS-"+datestring+"_right"], inplace=True)
                    grid_gdf = grid_gdf.rename(columns={"CS-"+datestring+"_left": "CS-"+datestring})
                    grid_gdf = grid_gdf.drop(columns=["CS-"+datestring+"_right"])
                except:
                    print ("no column to merge")
                try:
                    grid_gdf["HA0-"+datestring+"_left"] = grid_gdf["HA0-"+datestring+"_left"].fillna(grid_gdf["HA0-"+datestring+"_right"], inplace=True)
                    grid_gdf = grid_gdf.rename(columns={"HA0-"+datestring+"_left": "HA0-"+datestring})
                    grid_gdf = grid_gdf.drop(columns=["HA0-"+datestring+"_right"])
                except:
                    print ("no column to merge")
            
            grid_gdf = grid_gdf.sort_values(by=["CS-"+ datestring], ascending=False)
            grid_gdf = grid_gdf.drop_duplicates(subset=['centroid'], keep='first')
            
        except:
            print ("error with file " + str(file))
            unmerged_files.append(file)
        # print (grid_gdf.columns)
        # for duplicate centroid rows we keep the row with the higher CS value
        print ("duplicates removed " + str(grid_gdf.shape))
        dates_list.append(year + "-" + month)
        filenum += 1
    return grid_gdf

In [5]:
# goes through each folder and creates a dictionary with file name and column names
# there is also a list created with just the file names for use later
# It adds each of the files with more metadata to the large_files list

large_files = []

# Central America files
cam_dict = {}
cam_files = []
for file in os.listdir("./data/ALL_HFIC/Central America and the Caribbean"):
    if file.endswith(".shp"):
        cam_files.append("./data/ALL_HFIC/Central America and the Caribbean/" + file)
for file in cam_files:
    if file.endswith(".shp"):
        tempgdf = gpd.read_file(file)
        cam_dict[file] = tempgdf.columns
        if "ADMIN1" in tempgdf.columns:
            large_files.append(file)

# Central Asia files
cas_dict = {}
cas_files = []
for file in os.listdir("./data/ALL_HFIC/Central Asia"):
    if file.endswith(".shp"):
        cas_files.append("./data/ALL_HFIC/Central Asia/" + file)
for file in cas_files:
    tempgdf = gpd.read_file(file)
    cas_dict[file] = tempgdf.columns
    if "ADMIN1" in tempgdf.columns:
        large_files.append(file)

# East Africa files
ea_dict = {}
ea_files = []
for file in os.listdir("./data/ALL_HFIC/East Africa"):
    if file.endswith(".shp"):
        ea_files.append("./data/ALL_HFIC/East Africa/" + file)
for file in ea_files:
    tempgdf = gpd.read_file(file)
    ea_dict[file] = tempgdf.columns
    if "ADMIN1" in tempgdf.columns:
        large_files.append(file)

# West Africa files
wa_dict = {}
wa_files = []
for file in os.listdir("./data/ALL_HFIC/West Africa"):
    if file.endswith(".shp"):
        wa_files.append("./data/ALL_HFIC/West Africa/" + file)
for file in wa_files:
    tempgdf = gpd.read_file(file)
    wa_dict[file] = tempgdf.columns
    if "ADMIN1" in tempgdf.columns:
        large_files.append(file)

# South Africa files
sa_dict = {}
sa_files = []
for file in os.listdir("./data/ALL_HFIC/Southern Africa"):
    if file.endswith(".shp"):
        sa_files.append("./data/ALL_HFIC/Southern Africa/" + file)
for file in sa_files:
    tempgdf = gpd.read_file(file)
    sa_dict[file] = tempgdf.columns
    if "ADMIN1" in tempgdf.columns:
        large_files.append(file)

all_files = cam_files + cas_files + ea_files + wa_files + sa_files

In [6]:
dateslist = []
for file in all_files:
    month = file[-9:-7]
    year = file[-13:-9]
    dateslist.append(year + "-" + month)

In [14]:
# The script below can be used to generate a list of URLs that link straight to the files on the fewsnet server
# Use this if you do not have the files on your machine and do not want to download them
url_list = []
for file in ea_files:
    if file.endswith(".shp"):
        beginning = "https://fdw.fews.net/api/ipcpackage/?country_group="
        code = "902"
        middle = "&collection_date="
        date = file[-13:-9] + "-" + file[-9:-7] + "-" + "01"
        url_list.append(beginning + code + middle + date)
for file in wa_files:
    if file.endswith(".shp"):
        beginning = "https://fdw.fews.net/api/ipcpackage/?country_group="
        code = "901"
        middle = "&collection_date="
        date = file[-13:-9] + "-" + file[-9:-7] + "-" + "01"
        url_list.append(beginning + code + middle + date)
for file in sa_files:
    if file.endswith(".shp"):
        beginning = "https://fdw.fews.net/api/ipcpackage/?country_group="
        code = "903"
        middle = "&collection_date="
        date = file[-13:-9] + "-" + file[-9:-7] + "-" + "01"
        url_list.append(beginning + code + middle + date)
for file in cam_files:
    if file.endswith(".shp"):
        beginning = "https://fdw.fews.net/api/ipcpackage/?country_group="
        code = "904"
        middle = "&collection_date="
        date = file[-13:-9] + "-" + file[-9:-7] + "-" + "01"
        url_list.append(beginning + code + middle + date)

In [None]:
# this makes one giant geodataframe with all files' data in one place
# we can use this to get bounds for our grid
all_full = gpd.GeoDataFrame()
# dateslist = []
error_files = []
for file in all_files:
    try:
        print (file)
        tempgdf = gpd.read_file(file)
        tempgdf = tempgdf.to_crs(epsg=4326)
        # add a column named year with the value filename[3:7]
        # tempgdf["year"] = url[-10:-6]
        # # last character of file
        # tempgdf["month"] = url[-5:-3]
        # dateslist.append(url[-10:-6]+'-'+url[-5:-3])
        # add tempgdf to ea_full
        all_full = all_full.append(tempgdf)
    except:
        print ("error reading " + file)
        error_files.append(file)

fewsnet_template_grid = make_grid(all_full.total_bounds, 0.1)


In [8]:
# creates the grid template to be used in join
grid_template = gpd.read_file("./data/all_fewsnet_template_grid.geojson")
grid_template.set_geometry("geometry", inplace=True)
grid_template = grid_template.to_crs(epsg=4326)
grid_template["centroid"] = grid_template.centroid
grid_template.set_geometry("centroid", inplace=True)
grid_template = grid_template.to_crs(epsg=4326)
grid_template



  grid_template["centroid"] = grid_template.centroid


Unnamed: 0,geometry,centroid
0,"POLYGON ((-90.84149 14.83132, -90.74149 14.831...",POINT (-90.79149 14.88132)
1,"POLYGON ((-90.44149 14.93132, -90.34149 14.931...",POINT (-90.39149 14.98132)
2,"POLYGON ((-73.34149 18.43132, -73.24149 18.431...",POINT (-73.29149 18.48132)
3,"POLYGON ((-91.74149 15.83132, -91.64149 15.831...",POINT (-91.69149 15.88132)
4,"POLYGON ((-89.74149 14.23132, -89.64149 14.231...",POINT (-89.69149 14.28132)
...,...,...
152039,"POLYGON ((54.15851 12.43132, 54.25851 12.43132...",POINT (54.20851 12.48132)
152040,"POLYGON ((54.15851 12.53132, 54.25851 12.53132...",POINT (54.20851 12.58132)
152041,"POLYGON ((54.25851 12.43132, 54.35851 12.43132...",POINT (54.30851 12.48132)
152042,"POLYGON ((54.25851 12.53132, 54.35851 12.53132...",POINT (54.30851 12.58132)


In [9]:
# Joining the files to the grid
all_fewsnet_grid = join_to_grid(grid_template, all_files)

joining file 1 ./data/ALL_HFIC/Central America and the Caribbean/LAC_201007_CS.shp
CS-2010-07
with duplicates dataframe is (4575124, 28)
duplicates removed (152044, 27)
joining file 2 ./data/ALL_HFIC/Central America and the Caribbean/LAC_201107_CS.shp
CS-2011-07
with duplicates dataframe is (491201, 43)
duplicates removed (152044, 42)
joining file 3 ./data/ALL_HFIC/Central America and the Caribbean/LAC_200910_CS.shp
CS-2009-10
with duplicates dataframe is (466932, 58)
duplicates removed (152044, 57)
joining file 4 ./data/ALL_HFIC/Central America and the Caribbean/LAC_201204_CS.shp
CS-2012-04
with duplicates dataframe is (573342, 74)
duplicates removed (152044, 73)
joining file 5 ./data/ALL_HFIC/Central America and the Caribbean/LAC_201304_CS.shp
CS-2013-04
with duplicates dataframe is (534659, 90)
duplicates removed (152044, 89)
joining file 6 ./data/ALL_HFIC/Central America and the Caribbean/LAC_201410_CS.shp
CS-2014-10
with duplicates dataframe is (152044, 106)
duplicates removed (15

  result = DataFrame.merge(self, *args, **kwargs)


with duplicates dataframe is (152044, 227)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grid_gdf["CS-"+datestring+"_left"].fillna(grid_gdf["CS-"+datestring+"_right"], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grid_gdf["HA0-"+datestring+"_left"] = grid_gdf["HA0-"+datestring+"_left"].fillna(grid_gdf["HA0-"+datestring+"_right"], inplace=True)


duplicates removed (152044, 224)
joining file 218 ./data/ALL_HFIC/Southern Africa/SA_202102_CS.shp
CS-2021-02
with duplicates dataframe is (152044, 227)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grid_gdf["CS-"+datestring+"_left"].fillna(grid_gdf["CS-"+datestring+"_right"], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grid_gdf["HA0-"+datestring+"_left"] = grid_gdf["HA0-"+datestring+"_left"].fillna(grid_gdf["HA0-"+datestring+"_right"], inplace=True)


duplicates removed (152044, 224)
joining file 219 ./data/ALL_HFIC/Southern Africa/SA_202002_CS.shp
CS-2020-02
with duplicates dataframe is (152044, 227)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grid_gdf["CS-"+datestring+"_left"].fillna(grid_gdf["CS-"+datestring+"_right"], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grid_gdf["HA0-"+datestring+"_left"] = grid_gdf["HA0-"+datestring+"_left"].fillna(grid_gdf["HA0-"+datestring+"_right"], inplace=True)


duplicates removed (152044, 224)
joining file 220 ./data/ALL_HFIC/Southern Africa/SA_201410_CS.shp
CS-2014-10
with duplicates dataframe is (152044, 227)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grid_gdf["CS-"+datestring+"_left"].fillna(grid_gdf["CS-"+datestring+"_right"], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grid_gdf["HA0-"+datestring+"_left"] = grid_gdf["HA0-"+datestring+"_left"].fillna(grid_gdf["HA0-"+datestring+"_right"], inplace=True)


duplicates removed (152044, 224)
joining file 221 ./data/ALL_HFIC/Southern Africa/SA_201510_CS.shp
CS-2015-10
with duplicates dataframe is (152044, 227)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grid_gdf["CS-"+datestring+"_left"].fillna(grid_gdf["CS-"+datestring+"_right"], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grid_gdf["HA0-"+datestring+"_left"] = grid_gdf["HA0-"+datestring+"_left"].fillna(grid_gdf["HA0-"+datestring+"_right"], inplace=True)


duplicates removed (152044, 224)


In [11]:
# create a backup of thd df in case I do something stupid
all_fewsnet_grid_proof = all_fewsnet_grid

In [None]:
# Deal with the troubled column names and join
troubled_columns = ['CS200910_P-2009-10', 'CS201010_P-2010-10', 'CS201110_P-2011-10', 'CS201201_P-2012-01']

all_fewsnet_grid["CS-2009-10"].fillna(all_fewsnet_grid['CS200910_P-2009-10'] , inplace=True)
all_fewsnet_grid["CS-2010-10"].fillna(all_fewsnet_grid['CS201010_P-2010-10'] , inplace=True)
all_fewsnet_grid["CS-2011-10"].fillna(all_fewsnet_grid['CS201110_P-2011-10'] , inplace=True)
all_fewsnet_grid["CS-2012-01"].fillna(all_fewsnet_grid['CS201201_P-2012-01'] , inplace=True)

all_fewsnet_grid = all_fewsnet_grid.drop(columns=troubled_columns)

In [12]:
# getting rid of rows that are all nan's. shouldn't be any with the template
all_fewsnet_grid = all_fewsnet_grid.dropna(axis=0, how='all', subset=all_fewsnet_grid.iloc[:,3:224].columns)
all_fewsnet_grid.shape

(152044, 220)

In [13]:
# taking care of duplicate column names
class renamer():
    def __init__(self):
        self.d = dict()
    def __call__(self, x):
        if x not in self.d:
            self.d[x] = 0
            return x
        else:
            self.d[x] += 1
            return "%s_%d" % (x, self.d[x])
            df.rename(columns=renamer())
print (all_fewsnet_grid.columns[all_fewsnet_grid.columns.duplicated()])
all_fewsnet_grid = all_fewsnet_grid.rename(columns=renamer())
print (all_fewsnet_grid.columns[all_fewsnet_grid.columns.duplicated()])


Index(['cov_start-2021-10_left', 'cov_end-2021-10_left',
       'report_mon-2021-10_left', 'country-2021-10_left',
       'unit_name-2021-10_left', 'ADMIN0-2021-10_left', 'ADMIN1-2021-10_left',
       'ADMIN2-2021-10_left', 'ADMIN3-2021-10_left', 'LZCODE-2021-10_left',
       'LZNAME-2021-10_left', 'cov_start-2021-10_right',
       'cov_end-2021-10_right', 'report_mon-2021-10_right',
       'country-2021-10_right', 'unit_name-2021-10_right',
       'ADMIN0-2021-10_right', 'ADMIN1-2021-10_right', 'ADMIN2-2021-10_right',
       'ADMIN3-2021-10_right', 'LZCODE-2021-10_right', 'LZNAME-2021-10_right'],
      dtype='object')
Index([], dtype='object')


In [14]:
#Writing to files
all_fewsnet_grid_write = all_fewsnet_grid.drop(columns=['centroid'])
all_fewsnet_grid_write = all_fewsnet_grid_write.set_geometry('geometry')
all_fewsnet_grid_write.set_crs(epsg=4326, inplace=True)
all_fewsnet_grid_write.to_file("./data/all_fewsnet_grid.geojson", driver='GeoJSON')
print("grid df written to file")

all_fewsnet_centroid = all_fewsnet_grid.drop(columns=['geometry'])
all_fewsnet_centroid = all_fewsnet_centroid.set_geometry('centroid')
all_fewsnet_centroid.set_crs(epsg=4326, inplace=True)
all_fewsnet_centroid.to_file("./data/all_fewsnet_centroid.geojson", driver='GeoJSON')
print("centroid df written to file")

grid df written to file


KeyboardInterrupt: 

In [15]:
# sort columns by name
all_fewsnet_grid = all_fewsnet_grid.reindex(sorted(all_fewsnet_grid.columns), axis=1)
# make a list of all columns
columnlist = all_fewsnet_grid.columns.tolist()
print (columnlist)

['ADMIN0-2009-10', 'ADMIN0-2011-07', 'ADMIN0-2012-04', 'ADMIN0-2013-04', 'ADMIN0-2014-10', 'ADMIN0-2021-10_left', 'ADMIN0-2021-10_left_1', 'ADMIN0-2021-10_right', 'ADMIN0-2021-10_right_1', 'ADMIN0_lef-2010-07', 'ADMIN0_rig-2010-07', 'ADMIN1-2009-10', 'ADMIN1-2011-07', 'ADMIN1-2012-04', 'ADMIN1-2013-04', 'ADMIN1-2014-10', 'ADMIN1-2021-10_left', 'ADMIN1-2021-10_left_1', 'ADMIN1-2021-10_right', 'ADMIN1-2021-10_right_1', 'ADMIN1FR-2009-10', 'ADMIN1FR-2011-07', 'ADMIN1FR-2012-04', 'ADMIN1FR-2013-04', 'ADMIN1FR-2014-10', 'ADMIN1FR_l-2010-07', 'ADMIN1FR_r-2010-07', 'ADMIN1PT-2009-10', 'ADMIN1PT-2011-07', 'ADMIN1PT-2012-04', 'ADMIN1PT-2013-04', 'ADMIN1PT-2014-10', 'ADMIN1PT_l-2010-07', 'ADMIN1PT_r-2010-07', 'ADMIN1SP-2009-10', 'ADMIN1SP-2011-07', 'ADMIN1SP-2012-04', 'ADMIN1SP-2013-04', 'ADMIN1SP-2014-10', 'ADMIN1SP_l-2010-07', 'ADMIN1SP_r-2010-07', 'ADMIN1_lef-2010-07', 'ADMIN1_rig-2010-07', 'ADMIN2-2009-10', 'ADMIN2-2010-07', 'ADMIN2-2011-07', 'ADMIN2-2012-04', 'ADMIN2-2013-04', 'ADMIN2-2014-

In [16]:
# forward fill all CS columns
startcol = columnlist.index('CS-2009-07')
endcol = columnlist.index('CS-2021-10') + 1
all_fewsnet_grid.iloc[:,startcol:endcol] = all_fewsnet_grid.iloc[:,startcol:endcol].ffill(axis=1)

In [29]:
endcol-startcol

45

In [18]:
# write forward filled dfs to file
all_fewsnet_grid_write = all_fewsnet_grid.drop(columns=['centroid'])
all_fewsnet_grid_write = all_fewsnet_grid_write.set_geometry('geometry')
all_fewsnet_grid_write.set_crs(epsg=4326, inplace=True)
all_fewsnet_grid_write.to_file("./data/all_fewsnet_grid_ffill.geojson", driver='GeoJSON')
print("grid df written to file")

# all_fewsnet_centroid = all_fewsnet_grid.drop(columns=['geometry'])
# all_fewsnet_centroid = all_fewsnet_centroid.set_geometry('centroid')
# all_fewsnet_centroid.set_crs(epsg=4326, inplace=True)
# all_fewsnet_centroid.to_file("./data/all_fewsnet_centroid_ffill.geojson", driver='GeoJSON')
# print("centroid df written to file")

grid df written to file


# stop here

In [142]:

# write testgdf to shp file
testgdf.to_file("./data/testgdf.shp", driver='ESRI Shapefile')
all_fewsnet_grid_write = all_fewsnet_grid.drop(columns=['centroid'])
all_fewsnet_grid_write = all_fewsnet_grid_write.set_geometry('geometry')
all_fewsnet_grid_write.set_crs(epsg=4326, inplace=True)
all_fewsnet_grid_write.to_file("./data/all_fewsnet_grid_ffill.shp", driver='ESRI Shapefile')
print("grid df written to file")


# write centroid df top geojson file
all_fewsnet_centroid = all_fewsnet_grid.drop(columns=['geometry'])
all_fewsnet_centroid = all_fewsnet_centroid.set_geometry('centroid')
all_fewsnet_centroid.set_crs(epsg=4326, inplace=True)
all_fewsnet_centroid.to_file("./data/all_fewsnet_centroid_ffill.shp", driver='ESRI Shapefile')
print("centroid df written to file")

  testgdf.to_file("./data/testgdf.shp", driver='ESRI Shapefile')
  all_fewsnet_grid_write.to_file("./data/all_fewsnet_grid_ffill.shp", driver='ESRI Shapefile')


grid df written to file


  all_fewsnet_centroid.to_file("./data/all_fewsnet_centroid_ffill.shp", driver='ESRI Shapefile')


KeyboardInterrupt: 

In [152]:
# create dataframe from geometry and centroid columns in all_fewsnet_grid
all_fewsnet_template = all_fewsnet_grid[['centroid']]

In [153]:
all_fewsnet_template

Unnamed: 0,centroid
9573,POINT (-90.79149 14.88132)
12190,POINT (-90.39149 14.98132)
124059,POINT (-73.29149 18.48132)
3697,POINT (-91.69149 15.88132)
16761,POINT (-89.69149 14.28132)
...,...
957849,POINT (54.20851 12.48132)
957850,POINT (54.20851 12.58132)
958503,POINT (54.30851 12.48132)
958504,POINT (54.30851 12.58132)


In [154]:
# set geometry of all_fewsnet_template to geometry column
all_fewsnet_template.set_geometry('centroid', inplace=True)
all_fewsnet_template.set_crs(epsg=4326, inplace=True)
all_fewsnet_template

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


Unnamed: 0,centroid
9573,POINT (-90.79149 14.88132)
12190,POINT (-90.39149 14.98132)
124059,POINT (-73.29149 18.48132)
3697,POINT (-91.69149 15.88132)
16761,POINT (-89.69149 14.28132)
...,...
957849,POINT (54.20851 12.48132)
957850,POINT (54.20851 12.58132)
958503,POINT (54.30851 12.48132)
958504,POINT (54.30851 12.58132)


In [149]:
# write all_fewsnet_template to geojson file
all_fewsnet_template.to_file("./data/all_fewsnet_template_grid.geojson", driver='GeoJSON')

In [None]:
# write all_fewsnet_template to geojson file
all_fewsnet_template.to_file("./data/all_fewsnet_template_centroid.geojson", driver='GeoJSON')

In [24]:
# cleaning up the grid
# for columns in ea_grid that end in cs if value is 88 or 99 delete that value
# from what I can gather 88 and 99 are used to indicate missing data in FEWSNET
# for col in ea_grid.columns:
#     if col.endswith('-CS'):
#         ea_grid[col] = ea_grid[col].replace(88, np.nan)
#         ea_grid[col] = ea_grid[col].replace(99, np.nan)
# deleting rows that are just NaN values
ea_grid = ea_grid.dropna(axis=0, how='all', subset=ea_grid.iloc[:,3:93].columns)
# switching geometry from centroid column to geometry column. geometry is the column containing our tile squares. 
ea_grid = ea_grid.set_geometry('geometry')
ea_grid_nocentroid = ea_grid.drop(columns=['centroid'])


In [47]:
# replace columns in ea_grid with csgrid
ea_grid = ea_grid.drop(columns=ea_grid.columns[ea_grid.columns.str.endswith('-CS')])
ea_grid = ea_grid.join(csgrid)


In [49]:
ea_grid_nocentroid = ea_grid.drop(columns=['centroid'])
ea_grid_nocentroid.to_file("./data/join_experiments/ea_grid_1_bfill.geojson", driver='GeoJSON')

In [28]:
# sort dateslist
dateslist.sort()
# remove duplicates from list
dateslist = list(dict.fromkeys(dateslist))
dateslist

['2009-07',
 '2009-10',
 '2010-01',
 '2010-04',
 '2010-07',
 '2010-10',
 '2011-01',
 '2011-04',
 '2011-07',
 '2011-10',
 '2012-01',
 '2012-04',
 '2012-07',
 '2012-10',
 '2013-01',
 '2013-04',
 '2013-07',
 '2013-10',
 '2014-01',
 '2014-04',
 '2014-07',
 '2014-10',
 '2015-01',
 '2015-04',
 '2015-07',
 '2015-10',
 '2016-02',
 '2016-06',
 '2016-10',
 '2017-02',
 '2017-06',
 '2017-10',
 '2018-02',
 '2018-06',
 '2018-10',
 '2018-12',
 '2019-02',
 '2019-06',
 '2019-10',
 '2020-02',
 '2020-06',
 '2020-10',
 '2021-02',
 '2021-06',
 '2021-10']