In [188]:
#python3
# Hans D. Escobar H. (hdescobarh@unal.edu.co)

import numpy as np
import pandas as pd
from scipy.stats import t

# Ejercicio sobre patrón de disposición espacial

## Datos

In [189]:
parcelas_dtypes = {'parcela': str, 'subparcela': str, 'individuos_corymbosa': np.uint32, 'individuos_argentea': np.uint32}
parcelas_df = pd.read_csv("./datos_espeletia_verjon.csv", header= 1)
parcelas_df

Unnamed: 0,parcela,subparcela,individuos_corymbosa,individuos_argentea
0,1,1,3,2
1,1,2,1,5
2,1,3,2,9
3,1,4,6,5
4,1,5,3,13
...,...,...,...,...
70,3,21,4,0
71,3,22,8,0
72,3,23,8,0
73,3,24,9,0


In [190]:
def do_analysis_argentea(parcelas_df, specie_index: int):
    specie = ["individuos_argentea", "individuos_corymbosa"][specie_index]
    mean_individuals = parcelas_df[specie].mean()
    total_individuals = parcelas_df[specie].sum()
    all_plots: dict[int, object]= dict(parcelas_df[specie].value_counts())
    for id in range(0, max(all_plots.keys())):
        if id not in all_plots.keys():
            all_plots[id] = 0
    all_plots_df= pd.DataFrame.from_dict(all_plots, orient="index", columns= ["observed_f"]).sort_index().reset_index()
    all_plots_df.rename(columns= {"index": "x"}, inplace= True)
    all_plots_df["P_x"] = all_plots_df.apply(lambda row: (np.power(mean_individuals,row.x) / ( np.exp(mean_individuals) *  np.math.factorial(row.x) )), axis=1) # type: ignore
    all_plots_df["expected_f"] = all_plots_df.apply(lambda row: total_individuals * row.P_x, axis=1) # type: ignore
    observed_variance = all_plots_df.apply(lambda row: row.observed_f* np.power((row.x - mean_individuals), 2), axis=1).sum() / (total_individuals-1)
    coeficiente_dispersion = observed_variance / mean_individuals
    t_statistic = (coeficiente_dispersion - 1)/(np.sqrt(2/(total_individuals-1)))
    t_critical = t.ppf(0.95, total_individuals-1)
    p_value = 1-t.cdf(t_statistic, total_individuals-1)
    output = {
        "mean_x": mean_individuals,
        "total_n": total_individuals,
        "table": all_plots_df,
        "observed_variance": observed_variance,
        "coeficiente_dispersion": coeficiente_dispersion,
        "t_statistic": t_statistic,
        "t_critical": t_critical,
        "p_value": p_value
    }
    return output
    

## Coeficiente de dispersión - Blackman

| disposición | valor del coeficiente |
| ---- | ---- |
| uniforme | < 1 |
| azar | 1 |
| contagiosa | >1 |

## Todas las parcelas - S. argentea

In [191]:
all_plots = do_analysis_argentea(parcelas_df, 0)
all_plots_df = all_plots.pop("table")
all_plots

{'mean_x': 2.12,
 'total_n': 159,
 'observed_variance': 4.885569620253165,
 'coeficiente_dispersion': 2.3045139718175305,
 't_statistic': 11.594773801618759,
 't_critical': 1.6545548754376096,
 'p_value': 0.0}

In [192]:
all_plots_df

Unnamed: 0,x,observed_f,P_x,expected_f
0,0,40,0.1200316,19.085029
1,1,10,0.2544671,40.460261
2,2,2,0.2697351,42.887877
3,3,3,0.1906128,30.307433
4,4,3,0.1010248,16.06294
5,5,6,0.04283451,6.810686
6,6,3,0.01513486,2.406443
7,7,1,0.0045837,0.728808
8,8,2,0.001214681,0.193134
9,9,3,0.0002861247,0.045494


## Solo parcela 1 - S. argentea

In [193]:
plot_one = do_analysis_argentea(parcelas_df.query("parcela == 1"), 0)
plot_one_df = plot_one.pop("table")
plot_one

{'mean_x': 4.92,
 'total_n': 123,
 'observed_variance': 2.9167213114754102,
 'coeficiente_dispersion': 0.5928295348527257,
 't_statistic': -3.1801029934552605,
 't_critical': 1.6574394994001826,
 'p_value': 0.9990666689299641}

In [194]:
plot_one_df

Unnamed: 0,x,observed_f,P_x,expected_f
0,0,4,0.007299,0.897793
1,1,2,0.035912,4.417142
2,2,2,0.088343,10.866169
3,3,1,0.144882,17.820518
4,4,3,0.178205,21.919237
5,5,4,0.175354,21.568529
6,6,1,0.14379,17.686194
7,7,1,0.101064,12.430868
8,8,2,0.062154,7.644984
9,9,3,0.033978,4.179258


## Solo parcela 2 - S. argentea

In [195]:
plot_two = do_analysis_argentea(parcelas_df.query("parcela == 2"), 0)
plot_two_df = plot_two.pop("table")
plot_two

{'mean_x': 0.56,
 'total_n': 14,
 'observed_variance': 2.4738461538461536,
 'coeficiente_dispersion': 4.417582417582417,
 't_statistic': 8.713159718282174,
 't_critical': 1.7709333959867988,
 'p_value': 4.337482845118146e-07}

In [196]:
plot_two_df

Unnamed: 0,x,observed_f,P_x,expected_f
0,0,17,0.571209,7.996927
1,1,6,0.319877,4.478279
2,2,0,0.089566,1.253918
3,3,1,0.016719,0.234065
4,4,0,0.002341,0.032769
5,5,1,0.000262,0.00367


## Solo parcela 3 - S. argentea

In [197]:
plot_three = do_analysis_argentea(parcelas_df.query("parcela == 3"), 0)
plot_three_df = plot_three.pop("table")
plot_three

{'mean_x': 0.88,
 'total_n': 22,
 'observed_variance': 4.220952380952381,
 'coeficiente_dispersion': 4.796536796536797,
 't_statistic': 12.302185265159512,
 't_critical': 1.7207429028118775,
 'p_value': 2.2983281944277678e-11}

In [198]:
plot_three_df

Unnamed: 0,x,observed_f,P_x,expected_f
0,0,19,0.414783,9.125224
1,1,2,0.365009,8.030197
2,2,0,0.160604,3.533287
3,3,1,0.04711,1.036431
4,4,0,0.010364,0.228015
5,5,1,0.001824,0.040131
6,6,2,0.000268,0.005886


## Todas las parcelas - S. corymbosa

In [199]:
all_plots = do_analysis_argentea(parcelas_df, 0)
all_plots_df = all_plots.pop("table")
all_plots

{'mean_x': 2.12,
 'total_n': 159,
 'observed_variance': 4.885569620253165,
 'coeficiente_dispersion': 2.3045139718175305,
 't_statistic': 11.594773801618759,
 't_critical': 1.6545548754376096,
 'p_value': 0.0}

In [200]:
all_plots_df

Unnamed: 0,x,observed_f,P_x,expected_f
0,0,40,0.1200316,19.085029
1,1,10,0.2544671,40.460261
2,2,2,0.2697351,42.887877
3,3,3,0.1906128,30.307433
4,4,3,0.1010248,16.06294
5,5,6,0.04283451,6.810686
6,6,3,0.01513486,2.406443
7,7,1,0.0045837,0.728808
8,8,2,0.001214681,0.193134
9,9,3,0.0002861247,0.045494


## Solo parcela 1 - S. corymbosa

In [201]:
plot_one = do_analysis_argentea(parcelas_df.query("parcela == 1"), 1)
plot_one_df = plot_one.pop("table")
plot_one

{'mean_x': 2.2,
 'total_n': 55,
 'observed_variance': 1.5555555555555558,
 'coeficiente_dispersion': 0.7070707070707071,
 't_statistic': -1.5221052551362861,
 't_critical': 1.6735649059209428,
 'p_value': 0.9330912168615177}

In [202]:
plot_one_df

Unnamed: 0,x,observed_f,P_x,expected_f
0,0,6,0.110803,6.094174
1,1,4,0.243767,13.407182
2,2,5,0.268144,14.7479
3,3,4,0.196639,10.815127
4,4,3,0.108151,5.94832
5,5,1,0.047587,2.617261
6,6,2,0.017448,0.959662


## Solo parcela 2 - S. corymbosa

In [203]:
plot_two = do_analysis_argentea(parcelas_df.query("parcela == 2"), 1)
plot_two_df = plot_two.pop("table")
plot_two

{'mean_x': 2.04,
 'total_n': 51,
 'observed_variance': 2.8592,
 'coeficiente_dispersion': 1.4015686274509804,
 't_statistic': 2.007843137254902,
 't_critical': 1.6759050245283311,
 'p_value': 0.025039317994679178}

In [204]:
plot_two_df

Unnamed: 0,x,observed_f,P_x,expected_f
0,0,9,0.130029,6.631464
1,1,7,0.265259,13.528187
2,2,1,0.270564,13.798751
3,3,1,0.183983,9.383151
4,4,1,0.093832,4.785407
5,5,3,0.038283,1.952446
6,6,2,0.013016,0.663832
7,7,0,0.003793,0.19346
8,8,1,0.000967,0.049332


## Solo parcela 3 - S. corymbosa

In [205]:
plot_three = do_analysis_argentea(parcelas_df.query("parcela == 3"), 1)
plot_three_df = plot_three.pop("table")
plot_three

{'mean_x': 3.68,
 'total_n': 92,
 'observed_variance': 2.4773626373626376,
 'coeficiente_dispersion': 0.6731963688485428,
 't_statistic': -2.204411011287796,
 't_critical': 1.6617711550302645,
 'p_value': 0.9849912957831088}

In [206]:
plot_three_df

Unnamed: 0,x,observed_f,P_x,expected_f
0,0,6,0.025223,2.320514
1,1,1,0.092821,8.53949
2,2,4,0.17079,15.712662
3,3,2,0.209502,19.274199
4,4,2,0.192742,17.732263
5,5,3,0.141858,13.050946
6,6,1,0.087006,8.00458
7,7,1,0.04574,4.208122
8,8,4,0.021041,1.935736
9,9,1,0.008603,0.791501
