<a href="https://colab.research.google.com/github/boothmanrylan/accuracyAssessmentTools/blob/main/increasing_sample_size.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Installs and Imports

In [None]:
%cd /content
!git clone https://github.com/boothmanrylan/accuracyAssessmentTools.git
%cd accuracyAssessmentTools
!pip install .

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from acc_assessment.olofsson import Olofsson
from acc_assessment.utils import expand_error_matrix

# Create Assessments

In [None]:
MIN = 10 # with less than this very likely to encounter a divide by zero error

data = {
    "Deforestation":     [66, 0, 1, 2],
    "Forest gain":       [0, 55, 0, 1],
    "Stable forest":     [5, 8, 153, 9],
    "Stable non-forest": [4, 12, 11, 313],
}
mapped_area = {
    "Deforestation":     200000,
    "Forest gain":       150000,
    "Stable forest":     3200000,
    "Stable non-forest": 6450000,
}

df = pd.DataFrame(data)
df.index = df.columns

# sample(frac=1) shuffles the data to ensure they are in a random order
longform_data = expand_error_matrix(df, "map", "ref").sample(frac=1)

# create n assessments where the ith assessment is based on the first i samples
all_assessments = [
    Olofsson(longform_data.iloc[:i, :], mapped_area, "map", "ref")
    for i in range(MIN, longform_data.shape[0] + 1)
]

# Plot results

In [None]:
def make_plot(x, value, std, name, force_ylim=True):
    fig, ax = plt.subplots()

    twin_axis = ax.twinx()

    p1, = ax.plot(x, value, label=name)
    ax.fill_between(x, value - std, value + std, alpha=0.3)
    p2, = twin_axis.plot(x, std, label="Standard Deviation", color="red")

    ax.set(xlim=(x[0], x[-1]), xlabel="Sample Size")
    ax.set(ylabel=name)
    if force_ylim:
        ax.set(ylim=(0.5, 1.0))
    ax.yaxis.label.set_color(p1.get_color())
    ax.tick_params(axis="y", colors=p1.get_color())

    twin_axis.set(ylabel="Standard Deviation")
    twin_axis.yaxis.label.set_color(p2.get_color())
    twin_axis.tick_params(axis="y", colors=p2.get_color())

    ax.legend(handles=[p1, p2])

    plt.show()

In [None]:
# get the users accuracy and standard error for the deforestation classes
deforestation_users_accuracy = [x.users_accuracy("Deforestation") for x in all_assessments]
ua = np.array([x[0] for x in deforestation_users_accuracy])
se = np.array([x[1] for x in deforestation_users_accuracy])
x = np.arange(0, ua.shape[0]) + MIN

make_plot(x, ua, se, "Deforestation User's Accuracy")

In [None]:
# plot the overall accuracy
overall_accuracy = [x.overall_accuracy() for x in all_assessments]
oa = np.array([x[0] for x in overall_accuracy])
oa_se = np.array([x[1] for x in overall_accuracy])
x = np.arange(0, ua.shape[0]) + MIN

make_plot(x, oa, oa_se, "Overall Accuracy")

In [None]:
deforested_area = [x.area("Deforestation") for x in all_assessments]
area = np.array([x[0] for x in deforested_area])
error = np.array([x[1] for x in deforested_area])
x = np.arange(0, area.shape[0]) + MIN

make_plot(x, area, error, "Deforested Area (Pixels)", force_ylim=False)