# Oleaster (Eleagnus angustifolia) coverage investigation in the South Heves Grasslands Landscape Protection Area in 2023

## Importing libraries

In [None]:
import geopandas

from src.calculate_statistics.area_statistics import AreaStatisticsComparisonWithSampleArea
from src.utils.file_utils.write_csv import write_csv_from_dict
from src.utils.file_utils.write_excel import write_excel_sheet_from_dict, write_excel_sheet_from_dataframe
from src.utils.file_utils.create_results_folders import create_results_folder, remove_previous_results
from src.utils.languages.languages import languages

## Remove previous results

In [None]:
remove_previous_results('../../../results/oleasters_dhte_2023')

## Setting up results folder structure

In [None]:
create_results_folder(results_folder='../../../results', project_folder='oleasters_dhte_2023', figures_folder='diagrams', statistics_folder='area_statistics')

## Setting up language

In [None]:
language = languages['hu']

## Setting up the input data

In [None]:
oleasters_basic_data = geopandas.read_file(
    '../../../data/oleasters_dhte_2023/ezustfa_manual_digit.gpkg',
    layer='ezustfa_manual_digit'
)
sample_area_basic_data = geopandas.read_file(
    '../../../data/oleasters_dhte_2023/hatasterulet.gpkg',
    layer='hatasterulet_singlepart'
)

## Define Excel file

In [None]:
excel_file = '../../../results/oleasters_dhte_2023/area_statistics/oleasters.xlsx'

## Get sample area size in $m^2$

In [None]:
ludas_supbparts = sample_area_basic_data[sample_area_basic_data['subpart_name'].str.startswith('tarna')]
rakottyas_subparts = sample_area_basic_data[sample_area_basic_data['subpart_name'].str.startswith('pely')]

subpart_areas = {
    'Ludas': ludas_supbparts.geometry.area.sum(),
    'Rakottyás': rakottyas_subparts.geometry.area.sum(),
    language['sum']: ludas_supbparts.geometry.area.sum() + rakottyas_subparts.geometry.area.sum()
}
print('Area sizes')
print(subpart_areas)
write_excel_sheet_from_dict(dictionary=subpart_areas, file_name=excel_file, sheet_name=language['sub_areas'])

## Spatial join the oleaster data with the sample areas

In [None]:
study_area = oleasters_basic_data.sjoin(sample_area_basic_data, how='left', predicate='intersects')

## Remove oleasters outside the study area and remove the unnecessary columns

In [None]:
study_area.dropna(subset=['subpart_name'], inplace=True)
study_area.drop(
    columns=['category', 'area_left', 'estimated_individuals', 'index_right', 'fejlesztes', 'area_right'],
    axis=1,
    inplace=True)
study_area.rename(columns={'subpart_name':language['sub_area_name']}, inplace=True)
print('Column names of the cleaned base data frame')
print(study_area.columns)
print('Sample data from the base data frame')
print(study_area.head(5))

## Calculate the area of the oleaster polygons in $m^2$

In [None]:
study_area[language['area']] = study_area.geometry.area

## Create dataframe for the two separated sample areas

In [None]:
ludas = study_area[study_area[language['sub_area_name']].str.startswith('tarna')]
rakottyas = study_area[study_area[language['sub_area_name']].str.startswith('pely')]
print('Sample data from the data frames separated by sub-areas')
print(ludas.head(5))
print(rakottyas.head(5))

## Calculate statistics

In [None]:
study_area_statistics = AreaStatisticsComparisonWithSampleArea(
    study_area,
    subpart_areas['Ludas'] + subpart_areas['Rakottyás'],
    language = 'hu'
)
ludas_statistics = AreaStatisticsComparisonWithSampleArea(
    ludas,
    subpart_areas['Ludas'],
    language='hu'
)
rakottyas_statistics = AreaStatisticsComparisonWithSampleArea(
    rakottyas,
    subpart_areas['Rakottyás'],
    language='hu'
)
print('Statistics for all sample areas')
print(study_area_statistics.get_area_statistics())
print('Statistics for Ludas sample area')
print(ludas_statistics.get_area_statistics())
print('Statistics for Rakottyas sample area')
print(rakottyas_statistics.get_area_statistics())

## Write statistics to csv

In [None]:
write_csv_from_dict(
    study_area_statistics.get_area_statistics(),
    '../../../results/oleasters_dhte_2023/area_statistics/study_area_statistics.csv'
)
write_csv_from_dict(
    ludas_statistics.get_area_statistics(),
    '../../../results/oleasters_dhte_2023/area_statistics/ludas_statistics.csv'
)
write_csv_from_dict(
    rakottyas_statistics.get_area_statistics(),
    '../../../results/oleasters_dhte_2023/area_statistics/rakottyas_statistics.csv'
)

## Write statistics to Excel

In [None]:
write_excel_sheet_from_dict(
    study_area_statistics.get_area_statistics(),
    file_name=excel_file,
    sheet_name=f'{language["study_area"]} {language["statistics"]}'
)
write_excel_sheet_from_dict(
    ludas_statistics.get_area_statistics(),
    file_name=excel_file,
    sheet_name=f'Ludas {language["statistics"]}'
)
write_excel_sheet_from_dict(
    rakottyas_statistics.get_area_statistics(),
    file_name=excel_file,
    sheet_name=f'Rakottyás {language["statistics"]}'
)

## Classify polygons by area size based on natural breaks (Fisher-Jenks Algorithm), equal intervals, quartiles and based on estimated number of individuals

In [None]:
study_area_statistics.add_area_classifications_to_data()
ludas_statistics.add_area_classifications_to_data()
rakottyas_statistics.add_area_classifications_to_data()

In [None]:
print(study_area_statistics.data.columns)

### Add the custom classification to the data

In [None]:
individual_classification_breaks = [100, 500, 1000]
estimated_minimum = [1, 2, 5, 10]
estimated_maximum = [1, 4, 9, 100]
estimated_minimum_individuals_label = language['estimated_minimum_individuals']
estimated_maximum_individuals_label = language['estimated_maximum_individuals']
study_area_statistics.classify_areas(
    area_field_name=language['area'],
    breaks=individual_classification_breaks,
    labels=estimated_minimum,
    new_column_name=estimated_minimum_individuals_label
)
ludas_statistics.classify_areas(
    area_field_name='area',
    breaks=individual_classification_breaks,
    labels=estimated_minimum,
    new_column_name=estimated_minimum_individuals_label
)
rakottyas_statistics.classify_areas(
    area_field_name='area',
    breaks=individual_classification_breaks,
    labels=estimated_minimum,
    new_column_name=estimated_minimum_individuals_label
)
study_area_statistics.classify_areas(
    area_field_name='area',
    breaks=individual_classification_breaks,
    labels=estimated_maximum,
    new_column_name=estimated_maximum_individuals_label
)
ludas_statistics.classify_areas(
    area_field_name='area',
    breaks=individual_classification_breaks,
    labels=estimated_maximum,
    new_column_name=estimated_maximum_individuals_label
)
rakottyas_statistics.classify_areas(
    area_field_name='area',
    breaks=individual_classification_breaks,
    labels=estimated_maximum,
    new_column_name=estimated_maximum_individuals_label
)

### Write classified data to geopackage

In [None]:
study_area_statistics.data.to_file(
    '../../../results/oleasters_dhte_2023/gis_data/oleasters.gpkg',
    layer='study_area',
    driver='GPKG'
)
ludas_statistics.data.to_file(
    '../../../results/oleasters_dhte_2023/gis_data/oleasters.gpkg',
    layer='ludas',
    driver='GPKG'
)
rakottyas_statistics.data.to_file(
    '../../../results/oleasters_dhte_2023/gis_data/oleasters.gpkg',
    layer='rakottyas',
    driver='GPKG'
)

### Write classified data to csv

In [None]:
study_area_statistics.data.to_csv(
    '../../../results/oleasters_dhte_2023/area_statistics/study_area_classification.csv',
    index=False
)
ludas_statistics.data.to_csv(
    '../../../results/oleasters_dhte_2023/area_statistics/ludas_area_classification.csv',
    index=False
)
rakottyas_statistics.data.to_csv(
    '../../../results/oleasters_dhte_2023/area_statistics/rakottyas_area_classification.csv',
    index=False
)

### Write classified data to excel

In [None]:
write_excel_sheet_from_dataframe(
    study_area_statistics.data,
    file_name=excel_file,
    sheet_name=f'{language["study_area"]}_{language["classification"]}'
)
write_excel_sheet_from_dataframe(
    ludas_statistics.data,
    file_name=excel_file,
    sheet_name=f'Ludas {language["classification"]}'
)
write_excel_sheet_from_dataframe(
    study_area_statistics.data,
    file_name=excel_file,
    sheet_name=f'Rakottys_{language["classification"]}'
)

## Jenks' classification analysis

### Create statistics by natural break categories

In [None]:
study_area_jenks_statistics = study_area_statistics.get_classification_area_statistics(
    language['jenks'],
    language['area'],
    subpart_areas[language['sum']]
)
ludas_jenks_statistics = ludas_statistics.get_classification_area_statistics(
    language['jenks'],
    language['area'],
    subpart_areas['Ludas']
)
rakottyas_jenks_statistics = rakottyas_statistics.get_classification_area_statistics(
    language['jenks'],
    language['area'],
    subpart_areas['Rakottyás']
)
print('The jenks classification data for the three areas')
print(study_area_jenks_statistics)
print(ludas_jenks_statistics)
print(rakottyas_jenks_statistics)

### Write natural break statistics to csv

In [None]:
study_area_jenks_statistics.to_csv(
    '../../../results/oleasters_dhte_2023/area_statistics/study_area_jenks_statistics.csv',
     index=False
)
ludas_jenks_statistics.to_csv(
    '../../../results/oleasters_dhte_2023/area_statistics/ludas_jenks_statistics.csv',
    index=False
)
rakottyas_jenks_statistics.to_csv(
    '../../../results/oleasters_dhte_2023/area_statistics/rakottyas_jenks_statistics.csv',
    index=False
)

### Write natural break statistics to excel

In [None]:
write_excel_sheet_from_dataframe(
    study_area_jenks_statistics,
    file_name=excel_file,
    sheet_name=f'{language["study_area"]} {language["jenks"]} {language["statistics"]}'
)
write_excel_sheet_from_dataframe(
    ludas_jenks_statistics,
    file_name=excel_file,
    sheet_name=f'Ludas {language["jenks"]} {language["statistics"]}'
)
write_excel_sheet_from_dataframe(
    rakottyas_jenks_statistics,
    file_name=excel_file,
    sheet_name=f'Rakottyás {language["jenks"]} {language["statistics"]}'
)

### Create diagram for natural break statistics

In [None]:
study_area_statistics.create_classification_diagram(
    classification_column_name=language['jenks'],
    sample_area=subpart_areas[language['sum']],
    path='../../../results/oleasters_dhte_2023/diagrams/study_area_jenks_diagram.png',
    diagram_title=language['natural_break_study_area_diagram_title']
)
ludas_statistics.create_classification_diagram(
    classification_column_name=language['jenks'],
    sample_area=subpart_areas['Ludas'],
    path='../../../results/oleasters_dhte_2023/diagrams/ludas_jenks_diagram.png',
    diagram_title=language['natural_break_ludas_diagram_title']
)
rakottyas_statistics.create_classification_diagram(
    classification_column_name=language['jenks'],
    sample_area=subpart_areas['Rakottyás'],
    path='../../../results/oleasters_dhte_2023/diagrams/rakottyas_jenks_diagram.png',
    diagram_title=language['natural_break_rakottyas_diagram_title']
)

### Create pie chart for area distribution in jenks classification

In [None]:
study_area_statistics.create_classification_area_ratio_pie_chart(
    classification_column_name=language['jenks'],
    sample_area=subpart_areas[language['sum']],
    path='../../../results/oleasters_dhte_2023/diagrams/study_area_jenks_pie_chart.png',
    diagram_title=language['natural_break_study_area_pie_chart_diagram_title']
)
ludas_statistics.create_classification_area_ratio_pie_chart(
    classification_column_name=language['jenks'],
    sample_area=subpart_areas['Ludas'],
    path='../../../results/oleasters_dhte_2023/diagrams/ludas_jenks_pie_chart.png',
    diagram_title=language['natural_break_ludas_pie_chart_diagram_title']
)
rakottyas_statistics.create_classification_area_ratio_pie_chart(
    classification_column_name=language['jenks'],
    sample_area=subpart_areas['Rakottyás'],
    path='../../../results/oleasters_dhte_2023/diagrams/rakottyas_jenks_pie_chart.png',
    diagram_title=language['natural_break_rakottyas_pie_chart_diagram_title']
)

## Equal interval classification analysis

In [None]:
study_area_equal_interval_statistics = study_area_statistics.get_classification_area_statistics(
    language['equal_interval_breaks'],
    language['area'],
    subpart_areas[language['sum']]
)
ludas_equal_interval_statistics = ludas_statistics.get_classification_area_statistics(
    language['equal_interval_breaks'],
    language['area'],
    subpart_areas['Ludas']
)
rakottyas_equal_interval_statistics = rakottyas_statistics.get_classification_area_statistics(
    language['equal_interval_breaks'],
    language['area'],
    subpart_areas['Rakottyás']
)
print('The equal interval classification data for the three areas')
print(study_area_equal_interval_statistics)
print(ludas_equal_interval_statistics)
print(rakottyas_equal_interval_statistics)

### Write equal interval statistics to csv

In [None]:
study_area_equal_interval_statistics.to_csv(
    '../../../results/oleasters_dhte_2023/area_statistics/study_area_equal_interval_statistics.csv',
    index=False
)
ludas_equal_interval_statistics.to_csv(
    '../../../results/oleasters_dhte_2023/area_statistics/ludas_equal_interval_statistics.csv',
    index=False
)
rakottyas_equal_interval_statistics.to_csv(
    '../../../results/oleasters_dhte_2023/area_statistics/rakottyas_equal_interval_statistics.csv',
    index=False
)

### Write equal interval statistics to excel

In [None]:
write_excel_sheet_from_dataframe(
    study_area_equal_interval_statistics,
    file_name=excel_file,
    sheet_name=f'{language["study_area"]} {language["equal_interval"]} {language["statistics"]}'
)
write_excel_sheet_from_dataframe(
    ludas_equal_interval_statistics,
    file_name=excel_file,
    sheet_name=f'Ludas {language["equal_interval"]} {language["statistics"]}'
)
write_excel_sheet_from_dataframe(
    rakottyas_equal_interval_statistics,
    file_name=excel_file,
    sheet_name=f'Rakottyás {language["equal_interval"]} {language["statistics"]}'
)

### Create diagrams for equal interval statistics

In [None]:
study_area_statistics.create_classification_diagram(
    classification_column_name=language['equal_interval_breaks'],
    sample_area=subpart_areas[language['sum']],
    path='../../../results/oleasters_dhte_2023/diagrams/study_area_equal_interval_diagram.png',
    diagram_title=language['equal_interval_study_area_diagram_title']
)
ludas_statistics.create_classification_diagram(
    classification_column_name=language['equal_interval_breaks'],
    sample_area=subpart_areas['Ludas'],
    path='../../../results/oleasters_dhte_2023/diagrams/ludas_equal_interval_diagram.png',
    diagram_title=language['equal_interval_ludas_diagram_title']
)
rakottyas_statistics.create_classification_diagram(
    classification_column_name=language['equal_interval_breaks'],
    sample_area=subpart_areas['Rakottyás'],
    path='../../../results/oleasters_dhte_2023/diagrams/rakottyas_equal_interval_diagram.png',
    diagram_title=language['equal_interval_rakottyas_diagram_title']
)

### Create pie chart for area distribution in equal interval classification

In [None]:
study_area_statistics.create_classification_area_ratio_pie_chart(
    classification_column_name=language['equal_interval_breaks'],
    sample_area=subpart_areas[language['sum']],
    path='../../../results/oleasters_dhte_2023/diagrams/study_area_equal_interval_pie_chart.png',
    diagram_title=language['equal_interval_study_area_pie_chart_diagram_title']
)
ludas_statistics.create_classification_area_ratio_pie_chart(
    classification_column_name=language['equal_interval_breaks'],
    sample_area=subpart_areas['Ludas'],
    path='../../../results/oleasters_dhte_2023/diagrams/ludas_equal_interval_pie_chart.png',
    diagram_title=language['equal_interval_ludas_pie_chart_diagram_title']
)
rakottyas_statistics.create_classification_area_ratio_pie_chart(
    classification_column_name=language['equal_interval_breaks'],
    sample_area=subpart_areas['Rakottyás'],
    path='../../../results/oleasters_dhte_2023/diagrams/rakottyas_equal_interval_pie_chart.png',
    diagram_title=language['equal_interval_rakottyas_pie_chart_diagram_title']
)

## Quartile classification analysis

### Calculate quartile statistics

In [None]:
study_area_quartile_statistics = study_area_statistics.get_classification_area_statistics(
    language['quartiles'],
    language['area'],
    subpart_areas[language['sum']]
)
ludas_quartile_statistics = ludas_statistics.get_classification_area_statistics(
    language['quartiles'],
    language['area'],
    subpart_areas['Ludas']
)
rakottyas_quartile_statistics = rakottyas_statistics.get_classification_area_statistics(
    language['quartiles'],
    language['area'],
    subpart_areas['Rakottyás']
)
print('The quartile classification data for the three areas')
print(study_area_quartile_statistics)
print(ludas_quartile_statistics)
print(rakottyas_quartile_statistics)

### Write quartile statistics to csv

In [None]:
study_area_quartile_statistics.to_csv(
    '../../../results/oleasters_dhte_2023/area_statistics/study_area_quartile_statistics.csv',
    index=False
)
ludas_quartile_statistics.to_csv(
    '../../../results/oleasters_dhte_2023/area_statistics/ludas_quartile_statistics.csv',
    index=False
)
rakottyas_quartile_statistics.to_csv(
    '../../../results/oleasters_dhte_2023/area_statistics/rakottyas_quartile_statistics.csv',
    index=False
)

### Write quartile statistics to excel

In [None]:
write_excel_sheet_from_dataframe(
    study_area_quartile_statistics,
    file_name=excel_file,
    sheet_name=f'{language["study_area"]} {language["quartile"]} {language["statistics"]}'
)
write_excel_sheet_from_dataframe(
    ludas_quartile_statistics,
    file_name=excel_file,
    sheet_name=f'Ludas {language["quartile"]} {language["statistics"]}'
)
write_excel_sheet_from_dataframe(
    rakottyas_quartile_statistics,
    file_name=excel_file,
    sheet_name=f'Rakottyás {language["quartile"]} {language["statistics"]}'
)

### Create diagrams for quartile statistics

In [None]:
study_area_statistics.create_classification_diagram(
    classification_column_name=language['quartiles'],
    sample_area=subpart_areas[language['sum']],
    path='../../../results/oleasters_dhte_2023/diagrams/study_area_quartile_statistics.png',
    diagram_title=language['quartiles_study_area_diagram_title']
)
ludas_statistics.create_classification_diagram(
    classification_column_name=language['quartiles'],
    sample_area=subpart_areas['Ludas'],
    path='../../../results/oleasters_dhte_2023/diagrams/ludas_quartile_statistics.png',
    diagram_title=language['quartiles_ludas_diagram_title']
)
rakottyas_statistics.create_classification_diagram(
    classification_column_name=language['quartiles'],
    sample_area=subpart_areas['Rakottyás'],
    path='../../../results/oleasters_dhte_2023/diagrams/rakottyas_quartile_statistics.png',
    diagram_title=language['quartiles_rakottyas_diagram_title']
)

### Create pie chart for area distribution based on quartiles

In [None]:
study_area_statistics.create_classification_area_ratio_pie_chart(
    classification_column_name=language['quartiles'],
    sample_area=subpart_areas[language['sum']],
    path='../../../results/oleasters_dhte_2023/diagrams/study_area_quartile_pie_chart.png',
    diagram_title=language['quartiles_study_area_pie_chart_diagram_title']
)
ludas_statistics.create_classification_area_ratio_pie_chart(
    classification_column_name=language['quartiles'],
    sample_area=subpart_areas['Ludas'],
    path='../../../results/oleasters_dhte_2023/diagrams/ludas_quartile_pie_chart.png',
    diagram_title=language['quartiles_ludas_pie_chart_diagram_title']
)
rakottyas_statistics.create_classification_area_ratio_pie_chart(
    classification_column_name=language['quartiles'],
    sample_area=subpart_areas['Rakottyás'],
    path='../../../results/oleasters_dhte_2023/diagrams/rakottyas_quartile_pie_chart.png',
    diagram_title=language['quartiles_rakottyas_pie_chart_diagram_title']
)

## Estimate number of individuals

In [None]:
study_area_minimum_individuals = study_area_statistics.data[language['estimated_minimum_individuals']].astype(int).sum()
study_area_maximum_individuals = study_area_statistics.data[language['estimated_maximum_individuals']].astype(int).sum()
ludas_minimum_individuals = ludas_statistics.data[language['estimated_minimum_individuals']].astype(int).sum()
ludas_maximum_individuals = ludas_statistics.data[language['estimated_maximum_individuals']].astype(int).sum()
rakottyas_minimum_individuals = rakottyas_statistics.data[language['estimated_minimum_individuals']].astype(int).sum()
rakottyas_maximum_individuals = rakottyas_statistics.data[language['estimated_maximum_individuals']].astype(int).sum()

estimated_individuals = {
    'Ludas': {
        language['estimated_minimum_individuals']: ludas_minimum_individuals,
        language['estimated_maximum_individuals']: ludas_maximum_individuals,
    },
    'Rakottyás': {
        language['estimated_minimum_individuals']: rakottyas_minimum_individuals,
        language['estimated_maximum_individuals']: rakottyas_maximum_individuals,
    },
    language['study_area']: {
        language['estimated_minimum_individuals']: study_area_minimum_individuals,
        language['estimated_maximum_individuals']: study_area_maximum_individuals,
    },
}
print(estimated_individuals)

In [None]:
### Export estimated individuals to csv

In [None]:
write_csv_from_dict(estimated_individuals, '../../../results/oleasters_dhte_2023/area_statistics/estimated_individuals.csv')

In [None]:
### Export estimated individuals to excel
write_excel_sheet_from_dict(estimated_individuals, excel_file, language['estimated_individuals'])
