# Oleaster (Eleagnus angustifolia) coverage investigation in the South Heves Grasslands Landscape Protection Area in 2023

## Importing libraries

In [None]:
import geopandas

from src.calculate_statistics import AreaStatisticsComparisonWithSampleArea
from src.utils.file_utils import write_csv_from_dict
from src.utils.file_utils.write_excel import write_excel_sheet_from_dict, write_excel_sheet_from_dataframe

## Setting up the input data

In [None]:
oleasters_basic_data = geopandas.read_file(
    '../data/oleasters_dhte_2023/ezustfa_manual_digit.gpkg',
    layer='ezustfa_manual_digit'
)
sample_area_basic_data = geopandas.read_file(
    '../data/oleasters_dhte_2023/hatasterulet.gpkg',
    layer='hatasterulet_singlepart'
)

## Define Excel file

In [None]:
excel_file = '../results/oleasters_dhte_2023/area_statistics/oleasters.xlsx'

## Get sample area size in $m^2$

In [None]:
ludas_supbparts = sample_area_basic_data[sample_area_basic_data['subpart_name'].str.startswith('tarna')]
rakottyas_subparts = sample_area_basic_data[sample_area_basic_data['subpart_name'].str.startswith('pely')]

subpart_areas = {
    'ludas': ludas_supbparts.geometry.area.sum(),
    'rakottyas': rakottyas_subparts.geometry.area.sum(),
    'sum': ludas_supbparts.geometry.area.sum() + rakottyas_subparts.geometry.area.sum()
}
print('Area sizes')
print(subpart_areas)
write_excel_sheet_from_dict(dictionary=subpart_areas, file_name=excel_file, sheet_name='sub-areas')

## Spatial join the oleaster data with the sample areas

In [None]:
study_area = oleasters_basic_data.sjoin(sample_area_basic_data, how='left', predicate='intersects')

## Remove oleasters outside the study area and remove the unnecessary columns

In [None]:
study_area.dropna(subset=['subpart_name'], inplace=True)
study_area.drop(
    columns=['category', 'area_left', 'estimated_individuals', 'index_right', 'fejlesztes', 'area_right'],
    axis=1,
    inplace=True)
print('Column names of the cleaned base data frame')
print(study_area.columns)
print('Sample data from the base data frame')
print(study_area.head(5))

## Calculate the area of the oleaster polygons in $m^2$

In [None]:
study_area['area'] = study_area.geometry.area

## Create dataframe for the two separated sample areas

In [None]:
ludas = study_area[study_area['subpart_name'].str.startswith('tarna')]
rakottyas = study_area[study_area['subpart_name'].str.startswith('pely')]
print('Sample data from the data frames separated by sub-areas')
print(ludas.head(5))
print(rakottyas.head(5))

## Calculate statistics

In [None]:
study_area_statistics = AreaStatisticsComparisonWithSampleArea(
    study_area,
    subpart_areas['ludas'] + subpart_areas['rakottyas']
)
ludas_statistics = AreaStatisticsComparisonWithSampleArea(
    ludas,
    subpart_areas['ludas']
)
rakottyas_statistics = AreaStatisticsComparisonWithSampleArea(
    rakottyas,
    subpart_areas['rakottyas']
)
print('Statistics for all sample areas')
print(study_area_statistics.get_area_statistics())
print('Statistics for Ludas sample area')
print(ludas_statistics.get_area_statistics())
print('Statistics for Rakottyas sample area')
print(rakottyas_statistics.get_area_statistics())

## Write statistics to csv

In [None]:
write_csv_from_dict(
    study_area_statistics.get_area_statistics(),
    '../results/oleasters_dhte_2023/area_statistics/study_area_statistics.csv'
)
write_csv_from_dict(
    ludas_statistics.get_area_statistics(),
    '../results/oleasters_dhte_2023/area_statistics/ludas_statistics.csv'
)
write_csv_from_dict(
    rakottyas_statistics.get_area_statistics(),
    '../results/oleasters_dhte_2023/area_statistics/rakottyas_statistics.csv'
)

# Write statistics to Excel

In [None]:
write_excel_sheet_from_dataframe(
    study_area_statistics.data,
    file_name=excel_file,
    sheet_name='study_area_statistics'
)
write_excel_sheet_from_dataframe(
    ludas_statistics.data,
    file_name=excel_file,
    sheet_name='ludas_statistics'
)
write_excel_sheet_from_dataframe(
    rakottyas_statistics.data,
    file_name=excel_file,
    sheet_name='rakottyas_statistics'
)

## Classify polygons by area size based on natural breaks (Fisher-Jenks Algorithm), equal intervals and quartiles

In [None]:
study_area_statistics.add_area_classifications_to_data()
ludas_statistics.add_area_classifications_to_data()
rakottyas_statistics.add_area_classifications_to_data()

## Write classified data to geopackage

In [None]:
study_area_statistics.data.to_file(
    '../results/oleasters_dhte_2023/gis_data/oleasters.gpkg',
    layer='study_area',
    driver='GPKG'
)
ludas_statistics.data.to_file(
    '../results/oleasters_dhte_2023/gis_data/oleasters.gpkg',
    layer='ludas',
    driver='GPKG'
)
rakottyas_statistics.data.to_file(
    '../results/oleasters_dhte_2023/gis_data/oleasters.gpkg',
    layer='rakottyas',
    driver='GPKG')

## Write classified data to csv

In [None]:
study_area_statistics.data.to_csv(
    '../results/oleasters_dhte_2023/area_statistics/study_area_classification_data.csv',
    index=False
)
ludas_statistics.data.to_csv(
    '../results/oleasters_dhte_2023/area_statistics/ludas_area_classification.csv',
    index=False
)
rakottyas_statistics.data.to_csv(
    '../results/oleasters_dhte_2023/area_statistics/rakottyas_area_classification.csv',
    index=False
)

## Write classified data to excel

In [None]:
write_excel_sheet_from_dataframe(
    study_area_statistics.data,
    file_name=excel_file,
    sheet_name='study_area_classes'
)
write_excel_sheet_from_dataframe(
    ludas_statistics.data,
    file_name=excel_file,
    sheet_name='ludas_classes'
)
write_excel_sheet_from_dataframe(
    study_area_statistics.data,
    file_name=excel_file,
    sheet_name='rakottyas_classes'
)

## Jenks' classification analysis

### Create statistics by natural break categories

In [None]:
study_area_jenks_statistics = study_area_statistics.get_classification_area_statistics(
    'jenks',
    'area',
    subpart_areas['sum']
)
ludas_jenks_statistics = ludas_statistics.get_classification_area_statistics(
    'jenks',
    'area',
    subpart_areas['ludas']
)
rakottyas_jenks_statistics = rakottyas_statistics.get_classification_area_statistics(
    'jenks',
    'area',
    subpart_areas['rakottyas']
)
print('The jenks classification data for the three areas')
print(study_area_jenks_statistics)
print(ludas_jenks_statistics)
print(rakottyas_jenks_statistics)

### Write natural break statistics to csv

In [None]:
study_area_jenks_statistics.to_csv(
    '../results/oleasters_dhte_2023/area_statistics/study_area_jenks_statistics.csv',
     index=False
)
ludas_jenks_statistics.to_csv(
    '../results/oleasters_dhte_2023/area_statistics/ludas_jenks_statistics.csv',
    index=False
)
rakottyas_jenks_statistics.to_csv(
    '../results/oleasters_dhte_2023/area_statistics/rakottyas_jenks_statistics.csv',
    index=False
)

### Write natural break statistics to excel

In [None]:
write_excel_sheet_from_dataframe(
    study_area_jenks_statistics,
    file_name=excel_file,
    sheet_name='study_area_jenks_statistics'
)
write_excel_sheet_from_dataframe(
    ludas_jenks_statistics,
    file_name=excel_file,
    sheet_name='ludas_jenks_statistics'
)
write_excel_sheet_from_dataframe(
    rakottyas_jenks_statistics,
    file_name=excel_file,
    sheet_name='rakottyas_jenks_statistics'
)

### Create diagram for natural break statistics

In [None]:
study_area_statistics.create_classification_diagram(
    classification_column_name='jenks',
    sample_area=subpart_areas['sum'],
    path='../results/oleasters_dhte_2023/diagrams/study_area_jenks_diagram.png',
    diagram_title='Natural break classification for the entire study area'
)
ludas_statistics.create_classification_diagram(
    classification_column_name='jenks',
    sample_area=subpart_areas['ludas'],
    path='../results/oleasters_dhte_2023/diagrams/ludas_jenks_diagram.png',
    diagram_title='Natural break classification for Ludas sample area'
)
rakottyas_statistics.create_classification_diagram(
    classification_column_name='jenks',
    sample_area=subpart_areas['rakottyas'],
    path='../results/oleasters_dhte_2023/diagrams/rakottyas_jenks_diagram.png',
    diagram_title='Natural break classification for Rakottyás sample area'
)

# Create pie chart for area distribution in jenks classification

In [None]:
study_area_statistics.create_classification_area_ratio_pie_chart(
    classification_column_name='jenks',
    sample_area=subpart_areas['sum'],
    path='../results/oleasters_dhte_2023/diagrams/study_area_jenks_pie_chart.png'
)
ludas_statistics.create_classification_area_ratio_pie_chart(
    classification_column_name='jenks',
    sample_area=subpart_areas['ludas'],
    path='../results/oleasters_dhte_2023/diagrams/ludas_jenks_pie_chart.png'
)
rakottyas_statistics.create_classification_area_ratio_pie_chart(
    classification_column_name='jenks',
    sample_area=subpart_areas['rakottyas'],
    path='../results/oleasters_dhte_2023/diagrams/rakottyas_jenks_pie_chart.png'
)

## Equal interval classification analysis

In [None]:
study_area_equal_interval_statistics = study_area_statistics.get_classification_area_statistics(
    'equal_interval_breaks',
    'area',
    subpart_areas['sum']
)
ludas_equal_interval_statistics = ludas_statistics.get_classification_area_statistics(
    'equal_interval_breaks',
    'area',
    subpart_areas['ludas']
)
rakottyas_equal_interval_statistics = rakottyas_statistics.get_classification_area_statistics(
    'equal_interval_breaks',
    'area',
    subpart_areas['rakottyas']
)
print('The equal interval classification data for the three areas')
print(study_area_equal_interval_statistics)
print(ludas_equal_interval_statistics)
print(rakottyas_equal_interval_statistics)

### Write equal interval statistics to csv

In [None]:
study_area_equal_interval_statistics.to_csv(
    '../results/oleasters_dhte_2023/area_statistics/study_area_equal_interval_statistics.csv',
    index=False
)
ludas_equal_interval_statistics.to_csv(
    '../results/oleasters_dhte_2023/area_statistics/ludas_equal_interval_statistics.csv',
    index=False
)
rakottyas_equal_interval_statistics.to_csv(
    '../results/oleasters_dhte_2023/area_statistics/rakottyas_equal_interval_statistics.csv',
    index=False
)

### Write equal interval statistics to excel

In [None]:
write_excel_sheet_from_dataframe(
    study_area_equal_interval_statistics,
    file_name=excel_file,
    sheet_name='study_area_equal_interval_statistics'
)
write_excel_sheet_from_dataframe(
    ludas_equal_interval_statistics,
    file_name=excel_file,
    sheet_name='ludas_equal_interval_statistics'
)
write_excel_sheet_from_dataframe(
    rakottyas_equal_interval_statistics,
    file_name=excel_file,
    sheet_name='rakottyas_equal_interval_statistics'
)

### Create diagrams for equal interval statistics

In [None]:
study_area_statistics.create_classification_diagram(
    classification_column_name='equal_interval_breaks',
    sample_area=subpart_areas['sum'],
    path='../results/oleasters_dhte_2023/diagrams/study_area_equal_interval_diagram.png',
    diagram_title='Equal interval classifiication for the entire study area'
)
ludas_statistics.create_classification_diagram(
    classification_column_name='equal_interval_breaks',
    sample_area=subpart_areas['ludas'],
    path='../results/oleasters_dhte_2023/diagrams/ludas_equal_interval_diagram.png',
    diagram_title='Equal interval classifiication for the Ludas area'
)
rakottyas_statistics.create_classification_diagram(
    classification_column_name='equal_interval_breaks',
    sample_area=subpart_areas['rakottyas'],
    path='../results/oleasters_dhte_2023/diagrams/rakottyas_equal_interval_diagram.png',
    diagram_title='Equal interval classifiication for the Rakottyás area'
)

# Create pie chart for area distribution in equal interval classification

In [None]:
study_area_statistics.create_classification_area_ratio_pie_chart(
    classification_column_name='equal_interval_breaks',
    sample_area=subpart_areas['sum'],
    path='../results/oleasters_dhte_2023/diagrams/study_area_equal_interval_pie_chart.png'
)
ludas_statistics.create_classification_area_ratio_pie_chart(
    classification_column_name='equal_interval_breaks',
    sample_area=subpart_areas['ludas'],
    path='../results/oleasters_dhte_2023/diagrams/ludas_equal_interval_pie_chart.png'
)
rakottyas_statistics.create_classification_area_ratio_pie_chart(
    classification_column_name='equal_interval_breaks',
    sample_area=subpart_areas['rakottyas'],
    path='../results/oleasters_dhte_2023/diagrams/rakottyas_equal_interval_pie_chart.png'
)

## Quartile classification analysis

### Calculate quartile statistics

In [None]:
study_area_quartile_statistics = study_area_statistics.get_classification_area_statistics(
    'quartiles',
    'area',
    subpart_areas['sum']
)
ludas_quartile_statistics = ludas_statistics.get_classification_area_statistics(
    'quartiles',
    'area',
    subpart_areas['ludas']
)
rakottyas_quartile_statistics = rakottyas_statistics.get_classification_area_statistics(
    'quartiles',
    'area',
    subpart_areas['rakottyas']
)
print('The quartile classification data for the three areas')
print(study_area_quartile_statistics)
print(ludas_quartile_statistics)
print(rakottyas_quartile_statistics)

### Write quartile statistics to csv

In [None]:
study_area_quartile_statistics.to_csv(
    '../results/oleasters_dhte_2023/area_statistics/study_area_quartile_statistics.csv',
    index=False
)
ludas_quartile_statistics.to_csv(
    '../results/oleasters_dhte_2023/area_statistics/ludas_quartile_statistics.csv',
    index=False
)
rakottyas_quartile_statistics.to_csv(
    '../results/oleasters_dhte_2023/area_statistics/rakottyas_quartile_statistics.csv',
    index=False
)

### Write quartile statistics to excel

In [None]:
write_excel_sheet_from_dataframe(
    study_area_quartile_statistics,
    file_name=excel_file,
    sheet_name='study_area_quartile_statistics'
)
write_excel_sheet_from_dataframe(
    ludas_quartile_statistics,
    file_name=excel_file,
    sheet_name='ludas_quartile_statistics'
)
write_excel_sheet_from_dataframe(
    rakottyas_quartile_statistics,
    file_name=excel_file,
    sheet_name='rakottyas_quartile_statistics'
)

## Create diagrams for quartile statistics

In [None]:
study_area_statistics.create_classification_diagram(
    classification_column_name='quartiles',
    sample_area=subpart_areas['sum'],
    path='../results/oleasters_dhte_2023/diagrams/study_area_quartile_statistics.png',
    diagram_title='Classification by quartiles for the entire study area'
)
ludas_statistics.create_classification_diagram(
    classification_column_name='quartiles',
    sample_area=subpart_areas['ludas'],
    path='../results/oleasters_dhte_2023/diagrams/ludas_quartile_statistics.png',
    diagram_title='Classification by quartiles for Ludas sample area'
)
rakottyas_statistics.create_classification_diagram(
    classification_column_name='quartiles',
    sample_area=subpart_areas['rakottyas'],
    path='../results/oleasters_dhte_2023/diagrams/rakottyas_quartile_statistics.png',
    diagram_title='Classification by quartiles for Rakottyás sample area'
)

# Create pie chart for area distribution based on quartiles

In [None]:
study_area_statistics.create_classification_area_ratio_pie_chart(
    classification_column_name='quartiles',
    sample_area=subpart_areas['sum'],
    path='../results/oleasters_dhte_2023/diagrams/study_area_quartile_pie_chart.png'
)
ludas_statistics.create_classification_area_ratio_pie_chart(
    classification_column_name='quartiles',
    sample_area=subpart_areas['ludas'],
    path='../results/oleasters_dhte_2023/diagrams/ludas_quartile_pie_chart.png'
)
rakottyas_statistics.create_classification_area_ratio_pie_chart(
    classification_column_name='quartiles',
    sample_area=subpart_areas['rakottyas'],
    path='../results/oleasters_dhte_2023/diagrams/rakottyas_quartile_pie_chart.png'
)
