## Get Crop Statistics

**Get statistics for the crop type and area in the set and prepare for machine learning set up.**

In this notebook there are two different group of scripts. The first is to get the crop type from the parcels table, and the second to get the crop types from a different table.

At the first example the crop type is unique and do not need to be grouped, in the second example the crop types need to be grouped based on radiometrically similar crops.

In [None]:
import psycopg2
import pandas as pd
import numpy as np

In [None]:
# Conect to the database
conn = psycopg2.connect(
    host="localhost",
    database="postgres",
    user="postgres",
    password="")
cur = conn.cursor()

In [None]:
# Set the table names
parcels_table = "aoi2020"
crop_type_column = "" # Set the crop type column

bs_signatures = "aoi2020_bs_signatures"
s2_signatures = "aoi2020_s2_signatures"
c6_signatures = "aoi2020_c6_signatures"
bs_extracts = "aoi2020_bs_extracts"
date_start = "2019-01-01"
date_end = "2019-10-15"


# Set the folder to store the data
data_folder = ''

## Get crop types from the same table
Use the below scripts in case the cropcodes are in the same table with the parcels table.

**Note:** These scripts do not work if the crop types are in diferent table, in case they are in diferent table use the scripts at the end of the notebook "Get crop types from diferent table".

In [None]:
%%time

selectSql = f"""
    SELECT ogc_fid, {crop_type_column}, ST_area(wkb_geometry)
    FROM {parcels_table}
    WHERE ogc_fid In (select distinct pid
    FROM {bs_extracts});
"""

df = pd.read_sql_query(selectSql, conn)

print("Total: ", len(df.index))

df[0:10]

In [None]:
# Close database connection.
database.close_connection()

In [None]:
total_area = df['st_area'].sum()

crop_stats =[]

for c in df[crop_type_column].unique():
    cdict = {}
    cdict['crop'] = c
    cdict['area'] = df[df[crop_type_column]==c]['st_area'].sum()
    cdict['relarea'] = cdict.get('area')/total_area
    crop_stats.append(cdict)
    
crop_area = pd.DataFrame(crop_stats)
crop_area.set_index('crop', inplace=True)

sorted_crop_area = crop_area.sort_values(by=['relarea'], ascending=False)

sorted_crop_area['cum_relarea'] = sorted_crop_area['relarea'].cumsum(axis = 0)

sorted_crop_area[0:13]

In [None]:

taggable = sorted_crop_area[sorted_crop_area['cum_relarea'] < 0.98].index

df['class'] = df[crop_type_column].map(lambda r: (np.array(np.where(taggable == r)[0])) )
df['class'] = df['class'].map(lambda r: r[0] if len(r) else -1)

classes = df[df['class']>-1].copy()

classes.drop([crop_type_column, 'st_area'], axis=1, inplace=True)
classes.set_index('ogc_fid', inplace=True)

classes['class'].unique()


In [None]:
# Export the data to a csv file.
classes.to_csv(f'{data_folder}{parcels_table}_cropclasses.csv')

## Get crop types from diferent table
Use the below scripts in case the cropcodes are in a different table from the parcels table.

**Note:** These scripts do not work if the crop types are in the same table, in case they are in the same table use the above scripts instead.

In [None]:
# Set parameters
column_parcels_type_code = "" # Set the column name with the codes of the parcel type
table_cropcodes = "" # Set the name of the table with the crop codes

column_crop_code = "" # Set the crop code column from the cropcodes tabel
column_crop_name = "" # Set the crop name column from the cropcodes tabel

In [None]:
selectSql = f"""
    SELECT
        distinct ogc_fid,
        par.{column_parcels_type_code}::int,
        st_area(wkb_geometry),
        cro.{column_crop_name}
    FROM {parcels_table} par, {table_cropcodes} cro
    WHERE ogc_fid in (SELECT distinct pid FROM {bs_extracts})
    And par.{column_parcels_type_code}=cro.{column_crop_code};
    """
df = pd.read_sql_query(selectSql, conn)


df[0:10]

In [None]:
# Close database connection
database.close_connection()

In [None]:
# This is an example of grouping radiometrically similar crops.
# Only need is some cases depending of the data.
df['cropgroup'] = -1
cropcode = crop_type_column
# gra
df.loc[df[cropcode].isin([260, 252, 108, 254, 276, 263, 247, 101, 251, 111, 106, 250, 267, 268, 257, 113, 255, 264]), 'cropgroup']=0
# mai 
df.loc[df[cropcode].isin([216,5]), 'cropgroup'] = 1
# pot 
df.loc[df[cropcode].isin([151, 152]), 'cropgroup'] = 2
# wwh 
df.loc[df[cropcode].isin([11,13]), 'cropgroup'] = 3
# sbt 
df.loc[df[cropcode].isin([160,280]), 'cropgroup'] = 4
# wba 
df.loc[df[cropcode].isin([10]), 'cropgroup'] = 5
# wor 
df.loc[df[cropcode].isin([22]), 'cropgroup'] = 6
# sce 
df.loc[df[cropcode].isin([1,2]), 'cropgroup'] = 7
# wce 
df.loc[df[cropcode].isin([15,3,14]), 'cropgroup'] = 8
# veg 
df.loc[df[cropcode].isin([30,424,31,124]), 'cropgroup'] = 9
# fal. Make sure 308 and 310 are in this group, because they have different labels!!!!!!
df.loc[df[cropcode].isin([308, 310, 325, 327]), 'cropgroup'] = 10

df[0:10]

In [None]:
len(df)

In [None]:
total_area = df['st_area'].sum()

crop_stats =[]

for c in df['cropgroup'].unique():
    cdict = {}
    cdict['crop'] = c
    # cdict['cropname'] = df[df[crop_type_column]==c]['afgroede'].unique()
    cdict['area'] = df[df['cropgroup']==c]['st_area'].sum()
    cdict['relarea'] = cdict.get('area')/total_area
    crop_stats.append(cdict)
    
crop_area = pd.DataFrame(crop_stats)
crop_area.set_index('crop', inplace=True)

sorted_crop_area = crop_area.sort_values(by=['relarea'], ascending=False)

sorted_crop_area['cum_relarea'] = sorted_crop_area['relarea'].cumsum(axis = 0)

sorted_crop_area[0:25]

In [None]:
df[df['cropgroup']==-1][[crop_type_column, 'afgroede']]


In [None]:
#taggable = sorted_crop_area[sorted_crop_area['cropgroup'] > 0].index

#df['class'] = df['afgkode'].map(lambda r: (np.array(np.where(taggable == r)[0])) )
#df['class'] = df['class'].map(lambda r: r[0] if len(r) else -1)

classes = df[df['cropgroup']>-1].copy()

classes.drop(['afgkode', 'st_area'], axis=1, inplace=True)
classes.set_index('ogc_fid', inplace=True)

classes['cropgroup'].unique()

In [None]:
# Export the data to a csv file.
classes.to_csv(f'{data_folder}{parcels_table}_cropclasses.csv')