### Irrigation model input file prep

This code prepares the final input file to the irrigation (agrodem) model. It extracts all necessary attributes to crop locations. It also applies some name fixes as needed for the model to run smoothly.The output dataframe is exported as csv and ready to be used in the irrigation model.

**Original code:** [Alexandros Korkovelos](https://github.com/akorkovelos) & [Konstantinos Pegios](https://github.com/kopegios)<br />
**Conceptualization & Methodological review :** [Alexandros Korkovelos](https://github.com/akorkovelos)<br />
**Updates, Modifications:** [Alexandros Korkovelos](https://github.com/akorkovelos)<br />
**Funding:** The World Bank (contract number: 7190531), [KTH](https://www.kth.se/en/itm/inst/energiteknik/forskning/desa/welcome-to-the-unit-of-energy-systems-analysis-kth-desa-1.197296)

In [1]:
#Import modules and libraries
import os
import geopandas as gpd
from rasterstats import point_query
import logging
import pandas as pd
from shapely.geometry import Point, Polygon
import gdal
import rasterio as rio
import fiona
import gdal
import osr
import ogr
import rasterio.mask
import time
import numpy as np
import itertools
import re
from osgeo import gdal,ogr
import struct
import csv
import glob
from rasterio.warp import calculate_default_transform, reproject
from rasterio.enums import Resampling
from rasterstats import point_query
from pyproj import Proj
from shapely.geometry import Point, Polygon

In [118]:
# Import data 

path = r"N:\Agrodem\Irrigation_model\Output_data\agrodem_input"
name_of_file = "Cassava_Moz_1km_2030_SG_downscaled_SW.csv"

# Import csv as pandas dataframe
crop_df = pd.read_csv(path + "\\" + name_of_file)

In [119]:
# Fill in Nan values with 0
crop_df.fillna(99999,inplace=True)
crop_df.head(2)

Unnamed: 0,Pixel,State,lon,lat,Crop,Fraction,MaizeArea,elevation,sw_dist,sw_suit
0,0,Massinga,34.43319,-22.28009,Cassava,4.2e-07,4.2e-05,148,18.669125,9999
1,1,Massinga,34.44287,-22.28036,Cassava,4e-08,4e-06,148,17.677906,9999


#### Converting dataframe to geo-dataframe

In [120]:
# Add geometry and convert to spatial dataframe in source CRS
crop_df['geometry'] = list(zip(crop_df['lon'], crop_df['lat']))
crop_df['geometry'] = crop_df['geometry'].apply(Point)
crop_df = gpd.GeoDataFrame(crop_df, geometry='geometry', crs={'init': 'epsg:4326'})

# Reproject data in to Ordnance Survey GB coordinates
crop_gdf = crop_df.to_crs({'init': 'epsg:32737'})

In [121]:
crop_gdf.columns

Index(['Pixel', 'State', 'lon', 'lat', 'Crop', 'Fraction', 'MaizeArea',
       'elevation', 'sw_dist', 'sw_suit', 'geometry'],
      dtype='object')

In [122]:
##Dropping unecessary columns

droping_cols = ["Pixel"]
crop_gdf.drop(droping_cols, axis=1, inplace=True)

# New for whole Moz
crop_gdf.rename(columns={'elevation': 'sw_depth',
                         'MaizeArea': 'harv_area'}, inplace=True)

# Adding columns missing
crop_gdf["country"] = "moz"
#maize_gdf["admin_1"] = "Zambezia"
crop_gdf["curr_yield"] = "4500"
crop_gdf["max_yield"] = "6000"

In [123]:
crop_gdf.head(5)

Unnamed: 0,State,lon,lat,Crop,Fraction,harv_area,sw_depth,sw_dist,sw_suit,geometry,country,curr_yield,max_yield
0,Massinga,34.43319,-22.28009,Cassava,4.2e-07,4.2e-05,148,18.669125,9999,POINT (29183.669 7529046.250),moz,4500,6000
1,Massinga,34.44287,-22.28036,Cassava,4e-08,4e-06,148,17.677906,9999,POINT (30184.048 7529046.532),moz,4500,6000
2,Massinga,34.90539,-22.36485,Cassava,1.6e-07,1.6e-05,115,19.756705,9999,POINT (78183.723 7521046.544),moz,4500,6000
3,Massinga,34.91508,-22.3651,Cassava,0.0,0.0,117,20.506695,9999,POINT (79183.927 7521046.013),moz,4500,6000
4,Massinga,34.9046,-22.39189,Cassava,2.1e-05,0.002096,83,19.540019,9999,POINT (78183.982 7518045.765),moz,4500,6000


<div class="alert alert-block alert-warning">
<b>Note:</b> The geodataframe, contains crop points with needed attributes including a geometry column. It needs to be exported as shapefile. However, when this is done with geopandas, the column geometry (Point(x,y)) is not transfered in the attribute table. This is however needed for the extraction process below, otherwise "None" values are returned. In order to overcome this issue, the csv is imported into Qgis and then exported as feature layer in the same folder. This solves the issue. 
</div>

In [124]:
# Define output path
# Overwriting the csv file
path = r"N:\Agrodem\Irrigation_model\Output_data\agrodem_input"
shpname = "Cassava_Moz_1km_2030_SG_downscaled_SW.csv"

#drybeans
crop_gdf.to_csv(os.path.join(path,"{c}".format(c=shpname)))

#### >>> Please open Qgis and act as suggested above<<<

In [125]:
raster_path = r"N:\Agrodem\Irrigation_model\Input_data\Supporting_Layers"
raster_files =[]
csvoutpath = r"N:\Agrodem\Irrigation_model\Output_data\agrodem_input\1km_2030_Cassava\SG"

print ("Reading independent variables...")

for i in os.listdir(raster_path):
    if i.endswith('.tif'):
        raster_files.append(i)  

shp_filename = r"N:\Agrodem\Irrigation_model\Output_data\agrodem_input\shapefiles\Cassava_Moz_1km_2030_SG_downscaled_SW.shp"

print ("Extracting raster values to points...")

for i in raster_files:
    print("Extracting " + i + " values...")
    src_filename = raster_path + "\\" + i 
    li_values = list()

    src_ds=gdal.Open(src_filename) 
    gt=src_ds.GetGeoTransform()
    rb=src_ds.GetRasterBand(1)
    ds=ogr.Open(shp_filename)
    lyr=ds.GetLayer()
    
    for feat in lyr:
        geom = feat.GetGeometryRef()
        feat_id = feat.GetField('field_1')
        mx,my=geom.GetX(), geom.GetY()  #coord in map units

        #Convert from map to pixel coordinates.
        #Only works for geotransforms with no rotation.
        px = int((mx - gt[0]) / gt[1]) #x pixel
        py = int((my - gt[3]) / gt[5]) #y pixel

        intval=rb.ReadAsArray(px,py,1,1)
        li_values.append([feat_id, intval[0]])
        
    print ("Writing " + i + " values to csv...")
    
    with open(csvoutpath + "\\" + i.split('.')[0] + i.split('.')[1] + '.csv', 'w') as csvfile:
        wr = csv.writer(csvfile)
        wr.writerows(li_values) 

Reading independent variables...
Extracting raster values to points...
Extracting elevation.tif values...
Writing elevation.tif values to csv...
Extracting gyga_af_agg_erzd_tawcpf23mm__m_1km.tif values...
Writing gyga_af_agg_erzd_tawcpf23mm__m_1km.tif values to csv...
Extracting wc2.0_30s_prec_01.tif values...
Writing wc2.0_30s_prec_01.tif values to csv...
Extracting wc2.0_30s_prec_02.tif values...
Writing wc2.0_30s_prec_02.tif values to csv...
Extracting wc2.0_30s_prec_03.tif values...
Writing wc2.0_30s_prec_03.tif values to csv...
Extracting wc2.0_30s_prec_04.tif values...
Writing wc2.0_30s_prec_04.tif values to csv...
Extracting wc2.0_30s_prec_05.tif values...
Writing wc2.0_30s_prec_05.tif values to csv...
Extracting wc2.0_30s_prec_06.tif values...
Writing wc2.0_30s_prec_06.tif values to csv...
Extracting wc2.0_30s_prec_07.tif values...
Writing wc2.0_30s_prec_07.tif values to csv...
Extracting wc2.0_30s_prec_08.tif values...
Writing wc2.0_30s_prec_08.tif values to csv...
Extracting 

## Merge csv files with crop

In [126]:
#Import data 

path = r"N:\Agrodem\Irrigation_model\Output_data\agrodem_input"
name_of_file = "Cassava_Moz_1km_2030_SG_downscaled_SW.csv"

agrodem_input = pd.read_csv(path + "\\" + name_of_file)

In [127]:
csv_files = []

print ("Reading csv files...")

for i in os.listdir(csvoutpath):
    if i.endswith('.csv'):
        csv_files.append(i)  

for i in csv_files:
    print('Reading...'+ i)    
    df_csv = pd.read_csv(csvoutpath + "//" + i, index_col=None, header=None)
    df_csv.iloc[:,1] = df_csv.iloc[:,1].astype(str)
    df_csv.iloc[:,1] = df_csv.iloc[:,1].str.replace('[','')
    df_csv.iloc[:,1] = df_csv.iloc[:,1].str.replace(']','')
    columnName = i.split('.')[0]
    print("Merging..." + columnName)
    agrodem_input[columnName] = df_csv.iloc[:,1]

Reading csv files...
Reading...elevationtif.csv
Merging...elevationtif
Reading...gyga_af_agg_erzd_tawcpf23mm__m_1kmtif.csv
Merging...gyga_af_agg_erzd_tawcpf23mm__m_1kmtif
Reading...wc20_30s_prec_01.csv
Merging...wc20_30s_prec_01
Reading...wc20_30s_prec_02.csv
Merging...wc20_30s_prec_02
Reading...wc20_30s_prec_03.csv
Merging...wc20_30s_prec_03
Reading...wc20_30s_prec_04.csv
Merging...wc20_30s_prec_04
Reading...wc20_30s_prec_05.csv
Merging...wc20_30s_prec_05
Reading...wc20_30s_prec_06.csv
Merging...wc20_30s_prec_06
Reading...wc20_30s_prec_07.csv
Merging...wc20_30s_prec_07
Reading...wc20_30s_prec_08.csv
Merging...wc20_30s_prec_08
Reading...wc20_30s_prec_09.csv
Merging...wc20_30s_prec_09
Reading...wc20_30s_prec_10.csv
Merging...wc20_30s_prec_10
Reading...wc20_30s_prec_11.csv
Merging...wc20_30s_prec_11
Reading...wc20_30s_prec_12.csv
Merging...wc20_30s_prec_12
Reading...wc20_30s_srad_01.csv
Merging...wc20_30s_srad_01
Reading...wc20_30s_srad_02.csv
Merging...wc20_30s_srad_02
Reading...wc20_30

### Alternative way of extraction raster value to point (long run)

In [128]:
# Seetting rasters path
#set_path_4rasters = r"N:\Agrodem\Irrigation_model\Input_data\Supporting_Layers"

#for i in os.listdir(set_path_4rasters):
#   if i.endswith('.tif'):
#    #Check if this keeps the raster name as found with the .tif extension
#    columName = i[:-4]
#    print (columName)
#    print ("Extracting " + columName + " values to points...")
#    maize_gdf[columName] = point_query(maize_gdf, set_path_4rasters + "\\" + i)

In [129]:
agrodem_input.columns

Index(['Unnamed: 0', 'State', 'lon', 'lat', 'Crop', 'Fraction', 'harv_area',
       'sw_depth', 'sw_dist', 'sw_suit', 'geometry', 'country', 'curr_yield',
       'max_yield', 'elevationtif', 'gyga_af_agg_erzd_tawcpf23mm__m_1kmtif',
       'wc20_30s_prec_01', 'wc20_30s_prec_02', 'wc20_30s_prec_03',
       'wc20_30s_prec_04', 'wc20_30s_prec_05', 'wc20_30s_prec_06',
       'wc20_30s_prec_07', 'wc20_30s_prec_08', 'wc20_30s_prec_09',
       'wc20_30s_prec_10', 'wc20_30s_prec_11', 'wc20_30s_prec_12',
       'wc20_30s_srad_01', 'wc20_30s_srad_02', 'wc20_30s_srad_03',
       'wc20_30s_srad_04', 'wc20_30s_srad_05', 'wc20_30s_srad_06',
       'wc20_30s_srad_07', 'wc20_30s_srad_08', 'wc20_30s_srad_09',
       'wc20_30s_srad_10', 'wc20_30s_srad_11', 'wc20_30s_srad_12',
       'wc20_30s_tavg_01', 'wc20_30s_tavg_02', 'wc20_30s_tavg_03',
       'wc20_30s_tavg_04', 'wc20_30s_tavg_05', 'wc20_30s_tavg_06',
       'wc20_30s_tavg_07', 'wc20_30s_tavg_08', 'wc20_30s_tavg_09',
       'wc20_30s_tavg_10', 'wc2

### Updated names of input files for 30s rasters

In [130]:
# Renaming columns as input file requires
agrodem_input.rename(columns={'wc20_30s_prec_01': 'prec_1',
                              'wc20_30s_prec_02': 'prec_2', 
                              'wc20_30s_prec_03': 'prec_3', 
                              'wc20_30s_prec_04': 'prec_4',
                              'wc20_30s_prec_05': 'prec_5', 
                              'wc20_30s_prec_06': 'prec_6', 
                              'wc20_30s_prec_07': 'prec_7',
                              'wc20_30s_prec_08': 'prec_8', 
                              'wc20_30s_prec_09': 'prec_9', 
                              'wc20_30s_prec_10': 'prec_10',
                              'wc20_30s_prec_11': 'prec_11', 
                              'wc20_30s_prec_12': 'prec_12', 
                              'wc20_30s_srad_01': 'srad_1',
                              'wc20_30s_srad_02': 'srad_2', 
                              'wc20_30s_srad_03': 'srad_3', 
                              'wc20_30s_srad_04': 'srad_4',
                              'wc20_30s_srad_05': 'srad_5', 
                              'wc20_30s_srad_06': 'srad_6', 
                              'wc20_30s_srad_07': 'srad_7',
                              'wc20_30s_srad_08': 'srad_8', 
                              'wc20_30s_srad_09': 'srad_9', 
                              'wc20_30s_srad_10': 'srad_10',
                              'wc20_30s_srad_11': 'srad_11', 
                              'wc20_30s_srad_12': 'srad_12', 
                              'wc20_30s_tavg_01': 'tavg_1',
                              'wc20_30s_tavg_02': 'tavg_2', 
                              'wc20_30s_tavg_03': 'tavg_3', 
                              'wc20_30s_tavg_04': 'tavg_4',
                              'wc20_30s_tavg_05': 'tavg_5', 
                              'wc20_30s_tavg_06': 'tavg_6', 
                              'wc20_30s_tavg_07': 'tavg_7',
                              'wc20_30s_tavg_08': 'tavg_8',
                              'wc20_30s_tavg_09': 'tavg_9', 
                              'wc20_30s_tavg_10': 'tavg_10',
                              'wc20_30s_tavg_11': 'tavg_11', 
                              'wc20_30s_tavg_12': 'tavg_12', 
                              'wc20_30s_tmax_01': 'tmax_1',
                              'wc20_30s_tmax_02': 'tmax_2', 
                              'wc20_30s_tmax_03': 'tmax_3', 
                              'wc20_30s_tmax_04': 'tmax_4',
                              'wc20_30s_tmax_05': 'tmax_5', 
                              'wc20_30s_tmax_06': 'tmax_6', 
                              'wc20_30s_tmax_07': 'tmax_7',
                              'wc20_30s_tmax_08': 'tmax_8', 
                              'wc20_30s_tmax_09': 'tmax_9', 
                              'wc20_30s_tmax_10': 'tmax_10',
                              'wc20_30s_tmax_11': 'tmax_11', 
                              'wc20_30s_tmax_12': 'tmax_12',
                              'wc20_30s_tmin_01': 'tmin_1',
                              'wc20_30s_tmin_02': 'tmin_2', 
                              'wc20_30s_tmin_03': 'tmin_3', 
                              'wc20_30s_tmin_04': 'tmin_4',
                              'wc20_30s_tmin_05': 'tmin_5', 
                              'wc20_30s_tmin_06': 'tmin_6', 
                              'wc20_30s_tmin_07': 'tmin_7',
                              'wc20_30s_tmin_08': 'tmin_8', 
                              'wc20_30s_tmin_09': 'tmin_9', 
                              'wc20_30s_tmin_10': 'tmin_10',
                              'wc20_30s_tmin_11': 'tmin_11', 
                              'wc20_30s_tmin_12': 'tmin_12', 
                              'wc20_30s_wind_01': 'wind_1',
                              'wc20_30s_wind_02': 'wind_2', 
                              'wc20_30s_wind_03': 'wind_3', 
                              'wc20_30s_wind_04': 'wind_4',
                              'wc20_30s_wind_05': 'wind_5', 
                              'wc20_30s_wind_06': 'wind_6', 
                              'wc20_30s_wind_07': 'wind_7',
                              'wc20_30s_wind_08': 'wind_8', 
                              'wc20_30s_wind_09': 'wind_9', 
                              'wc20_30s_wind_10': 'wind_10',
                              'wc20_30s_wind_11': 'wind_11', 
                              'wc20_30s_wind_12': 'wind_12',
                              'gyga_af_agg_erzd_tawcpf23mm__m_1kmtif': 'awsc',
                              'Surface_Water_Suitability_Moz' : 'sw_suit',
                              'elevationtif': 'elevation',
                              'WTDtif':'gw_depth'}, inplace=True)


In [131]:
agrodem_input.columns

Index(['Unnamed: 0', 'State', 'lon', 'lat', 'Crop', 'Fraction', 'harv_area',
       'sw_depth', 'sw_dist', 'sw_suit', 'geometry', 'country', 'curr_yield',
       'max_yield', 'elevation', 'awsc', 'prec_1', 'prec_2', 'prec_3',
       'prec_4', 'prec_5', 'prec_6', 'prec_7', 'prec_8', 'prec_9', 'prec_10',
       'prec_11', 'prec_12', 'srad_1', 'srad_2', 'srad_3', 'srad_4', 'srad_5',
       'srad_6', 'srad_7', 'srad_8', 'srad_9', 'srad_10', 'srad_11', 'srad_12',
       'tavg_1', 'tavg_2', 'tavg_3', 'tavg_4', 'tavg_5', 'tavg_6', 'tavg_7',
       'tavg_8', 'tavg_9', 'tavg_10', 'tavg_11', 'tavg_12', 'tmax_1', 'tmax_2',
       'tmax_3', 'tmax_4', 'tmax_5', 'tmax_6', 'tmax_7', 'tmax_8', 'tmax_9',
       'tmax_10', 'tmax_11', 'tmax_12', 'tmin_1', 'tmin_2', 'tmin_3', 'tmin_4',
       'tmin_5', 'tmin_6', 'tmin_7', 'tmin_8', 'tmin_9', 'tmin_10', 'tmin_11',
       'tmin_12', 'wind_1', 'wind_2', 'wind_3', 'wind_4', 'wind_5', 'wind_6',
       'wind_7', 'wind_8', 'wind_9', 'wind_10', 'wind_11', 'wind_1

In [132]:
droping_cols = ["Unnamed: 0","geometry"]
agrodem_input.drop(droping_cols, axis=1, inplace=True)

## Exporting gdf into csv (or shapefile, gpkg as needed)

In [133]:
#gpkg
#agrodem_input.to_file("Zambezia_1km.gpkg", layer='Maize_Inputfile', driver="GPKG")

#shp
#agrodem_input.to_file("Moz_250m_Maize_190920.shp")

# Define output path
path = r"N:\Agrodem\Irrigation_model\Output_data\agrodem_input\Final_input_files"
csvname = "agrodem_input_Cassava_2030_SG_1km.csv"

#drybeans
agrodem_input.to_csv(os.path.join(path,"{c}".format(c=csvname)), index=False)