# Converting raster to vector

A cluster of functions that convert raster (.tiff) files generated as part of future scenario pipeline code, to vector (point shapefile) files.

**Original code:** [Konstantinos Pegios](https://github.com/kopegios) <br />
**Conceptualization & Methodological review :** [Alexandros Korkovelos](https://github.com/akorkovelos) & [Konstantinos Pegios](https://github.com/kopegios)<br />
**Updates, Modifications:** [Alexandros Korkovelos](https://github.com/akorkovelos) <br />
**Funding:** The World Bank (contract number: 7190531), [KTH](https://www.kth.se/en/itm/inst/energiteknik/forskning/desa/welcome-to-the-unit-of-energy-systems-analysis-kth-desa-1.197296)

In [12]:
# Importing necessary modules

import geopandas as gpd
import rasterio as rio
import pandas as pd
import fiona
import gdal
import osr
import ogr
import rasterio.mask
import time
import os
import ogr, gdal, osr, os
import numpy as np
import itertools
import re
from rasterio.warp import calculate_default_transform, reproject
from rasterio.enums import Resampling
from rasterstats import point_query
from pyproj import Proj

### Raster (Re)projection to target CRS

This step is not necessary if the raster file is already in the target CRS

In [13]:
# Define project function

def reproj(input_raster, output_raster, new_crs, factor):
    dst_crs = new_crs

    with rio.open(input_raster) as src:
        transform, width, height = calculate_default_transform(
            src.crs, dst_crs, src.width*factor, src.height*factor, *src.bounds)
        kwargs = src.meta.copy()
        kwargs.update({
            'crs': dst_crs,
            'transform': transform,
            'width': width,
            'height': height
        })

        with rio.open(output_raster, 'w', **kwargs) as dst:
            for i in range(1, src.count + 1):
                reproject(
                    source=rio.band(src, i),
                    destination=rio.band(dst, i),
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=transform,
                    dst_crs=dst_crs,
                    resampling=Resampling.nearest)

In [14]:
# Set inout directories
inpath = r"N:\Agrodem\Future_Scenarios\maize_cassava_scenarios\maize_cassava_scenarios"
outpath= r"N:\Agrodem\Future_Scenarios\maize_cassava_scenarios\maize_cassava_scenarios\re_projected"

# Provide the input raster and give a name to the output (reprojected) raster
input_raster = inpath + "\\" + "cassava_SG.tif"
output_raster = outpath + "\\" + "cassava_SG_reproj.tif"

# Set target CRS
new_crs = "epsg:4326"

# Provide a factor if you want zoomed in/out results; suggest keeping it to one unless fully understanding the implications
factor = 1

In [15]:
# Run function 
reproj(input_raster, output_raster, new_crs, factor)

### Converting raster to shapefile

In [16]:
# Define functions

def pixelOffset2coord(raster, xOffset,yOffset):
    geotransform = raster.GetGeoTransform()
    originX = geotransform[0]
    originY = geotransform[3]
    pixelWidth = geotransform[1]
    pixelHeight = geotransform[5]
    coordX = originX+pixelWidth*xOffset
    coordY = originY+pixelHeight*yOffset
    return coordX, coordY

def raster2array(rasterfn):
    raster = gdal.Open(rasterfn)
    band = raster.GetRasterBand(1)
    array = band.ReadAsArray()
    return array

def array2shp(array,outSHPfn,rasterfn):

    # max distance between points
    raster = gdal.Open(rasterfn)
    geotransform = raster.GetGeoTransform()
    pixelWidth = geotransform[1]

    srs = osr.SpatialReference()
    srs.ImportFromWkt(raster.GetProjection())
    
    # wkbPoint
    shpDriver = ogr.GetDriverByName("ESRI Shapefile")
    if os.path.exists(outSHPfn):
        shpDriver.DeleteDataSource(outSHPfn)
    outDataSource = shpDriver.CreateDataSource(outSHPfn)
    outLayer = outDataSource.CreateLayer(outSHPfn, geom_type=ogr.wkbPoint, srs=srs )
    featureDefn = outLayer.GetLayerDefn()
    outLayer.CreateField(ogr.FieldDefn("VALUE", ogr.OFTInteger))

    # array2dict
    point = ogr.Geometry(ogr.wkbPoint)
    row_count = array.shape[0]
    for ridx, row in enumerate(array):
#         print("Printing ridx..")
#         print(ridx)
        if ridx % 100 == 0:
            print ("{0} of {1} rows processed".format(ridx, row_count))
        for cidx, value in enumerate(row):
            #print("Printing cidx..")
            #print(cidx)
            #Only positive values
            if value > 0:
                Xcoord, Ycoord = pixelOffset2coord(raster,cidx,ridx)
                point.AddPoint(Xcoord, Ycoord)
                outFeature = ogr.Feature(featureDefn)
                outFeature.SetGeometry(point)
                outFeature.SetField("VALUE", int(ridx))
                outLayer.CreateFeature(outFeature)
                outFeature.Destroy()
                #outDS.Destroy()

def main(rasterfn,outSHPfn):
    array = raster2array(rasterfn)
    array2shp(array,outSHPfn,rasterfn)

In [17]:
# Set inout directories
inpath = r"N:\Agrodem\Future_Scenarios\maize_cassava_scenarios\maize_cassava_scenarios\re_projected"
outpath= r"N:\Agrodem\Future_Scenarios\maize_cassava_scenarios\maize_cassava_scenarios\vectorfiles"

# Provide the input raster and give a name to the output (reprojected) raster
rasterfn = inpath + "\\" + "cassava_SG_reproj.tif"
outSHPfn = outpath + "\\" + "cassava_SG.shp"

In [18]:
# Run the function
main(rasterfn,outSHPfn)

0 of 3580 rows processed
100 of 3580 rows processed
200 of 3580 rows processed
300 of 3580 rows processed
400 of 3580 rows processed
500 of 3580 rows processed
600 of 3580 rows processed
700 of 3580 rows processed
800 of 3580 rows processed
900 of 3580 rows processed
1000 of 3580 rows processed
1100 of 3580 rows processed
1200 of 3580 rows processed
1300 of 3580 rows processed
1400 of 3580 rows processed
1500 of 3580 rows processed
1600 of 3580 rows processed
1700 of 3580 rows processed
1800 of 3580 rows processed
1900 of 3580 rows processed
2000 of 3580 rows processed
2100 of 3580 rows processed
2200 of 3580 rows processed
2300 of 3580 rows processed
2400 of 3580 rows processed
2500 of 3580 rows processed
2600 of 3580 rows processed
2700 of 3580 rows processed
2800 of 3580 rows processed
2900 of 3580 rows processed
3000 of 3580 rows processed
3100 of 3580 rows processed
3200 of 3580 rows processed
3300 of 3580 rows processed
3400 of 3580 rows processed
3500 of 3580 rows processed


### Assigning lat/long columns to the shapefile

In [19]:
# Import as geodataframe
path_shp = r"N:\Agrodem\Future_Scenarios\maize_cassava_scenarios\maize_cassava_scenarios\vectorfiles"
name_shp = "cassava_SG.shp"
future_crop_gdf = gpd.read_file(path_shp + "\\" + name_shp)

In [20]:
# Creating lon/lat columns
future_crop_gdf['lon'] = future_crop_gdf["geometry"].x
future_crop_gdf['lat'] = future_crop_gdf["geometry"].y

In [21]:
future_crop_gdf.head(3)

Unnamed: 0,VALUE,geometry,lon,lat
0,185,POINT (39.32244 -11.31020),39.322436,-11.310195
1,187,POINT (39.32702 -11.31936),39.32702,-11.319362
2,188,POINT (39.34077 -11.32395),39.34077,-11.323946


#### Exporting file back to shp or gpkg

In [22]:
# Define output path
path = r"N:\Agrodem\Future_Scenarios\maize_cassava_scenarios\maize_cassava_scenarios\vectorfiles"
name_shp = "cassava_SG.shp"

#dshp
future_crop_gdf.to_file(os.path.join(path,name_shp), index=False)

#gpkg
#future_crop_gdf.to_file("maize_BAU.gpkg", layer='Maize_Inputfile_Future', driver="GPKG")