In [130]:
#python3
# Hans D. Escobar H. (hdescobarh@unal.edu.co)

import numpy as np
import pandas as pd
from scipy.stats import t
from scipy.stats import chi2

# Ejercicio sobre patrón de disposición espacial

Secciones:
1. Datos
2. Simbolos y ecuaciones<br>
    2.1. Generales<br>
    2.2. Para prueba Chi-cuadrado<br>
    2.3. Para prueba t-Student<br>
3. Resultados<br>
    3.1. S. argentea<br>
    3.2. S. corymbosa<br>

## 1. Datos

In [131]:
parcelas_dtypes = {'parcela': str, 'subparcela': str, 'individuos_corymbosa': np.uint32, 'individuos_argentea': np.uint32}
parcelas_df = pd.read_csv("./datos_espeletia_verjon.csv", header= 1)
parcelas_df

Unnamed: 0,parcela,subparcela,individuos_corymbosa,individuos_argentea
0,1,1,3,2
1,1,2,1,5
2,1,3,2,9
3,1,4,6,5
4,1,5,3,13
...,...,...,...,...
70,3,21,4,0
71,3,22,8,0
72,3,23,8,0
73,3,24,9,0


## 2. Simbolos y ecuaciones

### Generales

- $x$. Numeros de individuos en parcela.
- $f_{obs}(x)$. Frecuencia absoluta *observada*. Numero de parcelas con x individuos *observadas*.
- $k$. Máximo número de individuos observado en una parcela.
- $n = \sum_{x=0}^k f_{obs}(x)$. Número total de subparcelas.
- $\bar x$ media de individuos por parcela:
$$\bar x = \frac{\sum_{x=0}^k(f_{obs}(x) \cdot x)}{n}$$

### 2.1 Para prueba con Chi-cuadrado

- $P(x)$ Frecuencia o masa de probabilidad para una distribución de Poisson. Probabilidad de $x$. Prestar atención que el numero de euler esta elevado es a la media.
$$P(x) = \frac{\bar x^{x}}{e^{\bar x}  \cdot x!}$$
- $f_{esp}(x)$. Frecuencia absoluta *esperada*. Número de parcelas con x individuos *esperados*.
$$f_{esp}(x) = n \cdot P(x)$$
- $χ^2$. Estadístico Chi-cuadrado. Grados de libertad: $(k + 1) - 2$.
$$χ^2 = \sum_{x=0}^k(\frac{(f_{obs}(x)-f_{esp}(x))^2}{f_{esp}(x)})$$

### 2.2. Para prueba con t-Student

- $S_x^2$. Varianza muestral.
$$S_x^2 = \frac{\sum_{x=0}^k(f_{obs}(x) \cdot (x - \bar x)^2)}{n-1}$$
- $C.D.$ Coeficiente de dispersión (Blackman, 1942). Sí se tiene una distribución Poisson, $\bar x = S_x^2$.
$$\text{C.D.} = \frac{S_x^2}{\bar x}$$

| disposición | valor del coeficiente |
| ---- | ---- |
| uniforme | < 1 |
| azar | 1 |
| contagiosa | >1 |

- $S_e$. Error estandar.
$$S = \sqrt{\frac{2}{n-1}}$$
- $t_obs$ Estádistico t-Student. Grados de libertad: $n-1$.
$$t_{obs} = \frac{\text{C.D.}-1}{S_e}$$



In [132]:
def do_analysis(parcelas_df: pd.DataFrame, specie_index: int):
    specie = ["individuos_argentea", "individuos_corymbosa"][specie_index]
    mean_individuals = parcelas_df[specie].mean()
    total_plots = len(parcelas_df.index)
    frequency_table: dict[int, object]= dict(parcelas_df[specie].value_counts())
    for id in range(0, max(frequency_table.keys())):
        if id not in frequency_table.keys():
            frequency_table[id] = 0
    frequency_table_df= pd.DataFrame.from_dict(frequency_table, orient="index", columns= ["observed_f"]).sort_index().reset_index()
    frequency_table_df.rename(columns= {"index": "x"}, inplace= True)

    # chi-square test
    frequency_table_df["P_x"] = frequency_table_df.apply(lambda row: (np.power(mean_individuals,row.x) / ( np.exp(mean_individuals) *  np.math.factorial(row.x) )), axis=1) # type: ignore
    frequency_table_df["expected_f"] = frequency_table_df.apply(lambda row: total_plots * row.P_x, axis=1) # type: ignore
    chi2_statistic = frequency_table_df.apply(lambda row: (np.power(row.observed_f - row.expected_f, 2))/ row.expected_f, axis=1).sum()
    chi2_freedom_degrees = len(frequency_table_df.index) - 2 # total classes - 2
    chi2_critical = chi2.ppf(0.95, chi2_freedom_degrees) 
    chi2_p_value = 1-t.cdf(chi2_statistic, chi2_freedom_degrees)

    # t-Student test
    observed_variance = frequency_table_df.apply(
        lambda row: row.observed_f * np.power((row.x - mean_individuals), 2), axis=1
        ).sum() / (total_plots-1)
    coeficiente_dispersion = observed_variance / mean_individuals
    t_statistic = (coeficiente_dispersion - 1)/(np.sqrt(2/(total_plots-1)))
    t_freedom_degrees = total_plots-1
    t_critical = t.ppf(0.95, t_freedom_degrees)
    t_p_value = 1-t.cdf(t_statistic, t_freedom_degrees)

    # Summarize
    summary = pd.DataFrame.from_dict({
        "mean_x": mean_individuals,
        "total_n": total_plots,
        "chi2_statistic": chi2_statistic,
        "chi2_critical_0dot05": chi2_critical,
        "chi2_p_value": chi2_p_value,
        "observed_variance": observed_variance,
        "coeficiente_dispersion": coeficiente_dispersion,
        "t_statistic": t_statistic,
        "t_critical_0dot05": t_critical,
        "t_p_value": t_p_value
    }, orient = "index", columns = ["value"])
    output = summary, frequency_table_df
    return output
    

## 3. Resultados

### 3.1. S. argentea

#### 3.1.1 Todas las parcelas - S. argentea

In [133]:
all_plots_summary_argentea, all_plots_df_argentea = do_analysis(parcelas_df, 0)
all_plots_summary_argentea

Unnamed: 0,value
mean_x,2.12
total_n,75.0
chi2_statistic,158945.000829
chi2_critical_0dot05,21.02607
chi2_p_value,0.0
observed_variance,10.431351
coeficiente_dispersion,4.920449
t_statistic,23.847159
t_critical_0dot05,1.665707
t_p_value,0.0


In [134]:
all_plots_df_argentea

Unnamed: 0,x,observed_f,P_x,expected_f
0,0,40,0.1200316,9.002372
1,1,10,0.2544671,19.085029
2,2,2,0.2697351,20.230131
3,3,3,0.1906128,14.295959
4,4,3,0.1010248,7.576858
5,5,6,0.04283451,3.212588
6,6,3,0.01513486,1.135114
7,7,1,0.0045837,0.343778
8,8,2,0.001214681,0.091101
9,9,3,0.0002861247,0.021459


#### 3.1.2 Solo parcela 1 - S. argentea

In [135]:
plot_one_summary_argentea, plot_one_df_argentea = do_analysis(parcelas_df.query("parcela == 1"), 0)
plot_one_summary_argentea

Unnamed: 0,value
mean_x,4.92
total_n,25.0
chi2_statistic,226.6285
chi2_critical_0dot05,21.02607
chi2_p_value,0.0
observed_variance,14.82667
coeficiente_dispersion,3.01355
t_statistic,6.975142
t_critical_0dot05,1.710882
t_p_value,1.632926e-07


In [136]:
plot_one_df_argentea

Unnamed: 0,x,observed_f,P_x,expected_f
0,0,4,0.007299,0.182478
1,1,2,0.035912,0.897793
2,2,2,0.088343,2.208571
3,3,1,0.144882,3.622056
4,4,3,0.178205,4.455129
5,5,4,0.175354,4.383847
6,6,1,0.14379,3.594755
7,7,1,0.101064,2.526599
8,8,2,0.062154,1.553858
9,9,3,0.033978,0.849443


#### 3.1.3 Solo parcela 2 - S. argentea

In [137]:
plot_two_summary_argentea, plot_two_df_argentea = do_analysis(parcelas_df.query("parcela == 2"), 0)
plot_two_summary_argentea

Unnamed: 0,value
mean_x,0.56
total_n,25.0
chi2_statistic,154.7143
chi2_critical_0dot05,9.487729
chi2_p_value,5.234541e-09
observed_variance,1.34
coeficiente_dispersion,2.392857
t_statistic,4.824999
t_critical_0dot05,1.710882
t_p_value,3.240344e-05


In [138]:
plot_two_df_argentea

Unnamed: 0,x,observed_f,P_x,expected_f
0,0,17,0.571209,14.280227
1,1,6,0.319877,7.996927
2,2,0,0.089566,2.23914
3,3,1,0.016719,0.417973
4,4,0,0.002341,0.058516
5,5,1,0.000262,0.006554


#### 3.1.4 Solo parcela 3 - S. argentea

In [139]:
plot_three_summary_argentea, plot_three_df_argentea = do_analysis(parcelas_df.query("parcela == 3"), 0)
plot_three_summary_argentea

Unnamed: 0,value
mean_x,0.88
total_n,25.0
chi2_statistic,631.0756
chi2_critical_0dot05,11.0705
chi2_p_value,9.481305e-14
observed_variance,3.693333
coeficiente_dispersion,4.19697
t_statistic,11.07463
t_critical_0dot05,1.710882
t_p_value,3.229372e-11


In [140]:
plot_three_df_argentea

Unnamed: 0,x,observed_f,P_x,expected_f
0,0,19,0.414783,10.369573
1,1,2,0.365009,9.125224
2,2,0,0.160604,4.015099
3,3,1,0.04711,1.177762
4,4,0,0.010364,0.259108
5,5,1,0.001824,0.045603
6,6,2,0.000268,0.006688


### 3.2. S. corymbosa

#### 3.2.1. Todas las parcelas - S. corymbosa

In [141]:
all_plots_summary_corymbosa, all_plots_df_corymbosa = do_analysis(parcelas_df, 1)
all_plots_summary_corymbosa

Unnamed: 0,value
mean_x,2.64
total_n,75.0
chi2_statistic,139.4858
chi2_critical_0dot05,15.50731
chi2_p_value,3.885781e-15
observed_variance,6.665946
coeficiente_dispersion,2.52498
t_statistic,9.276088
t_critical_0dot05,1.665707
t_p_value,2.509104e-14


In [142]:
all_plots_summary_corymbosa

Unnamed: 0,value
mean_x,2.64
total_n,75.0
chi2_statistic,139.4858
chi2_critical_0dot05,15.50731
chi2_p_value,3.885781e-15
observed_variance,6.665946
coeficiente_dispersion,2.52498
t_statistic,9.276088
t_critical_0dot05,1.665707
t_p_value,2.509104e-14


#### 3.2.2.  Solo parcela 1 - S. corymbosa

In [143]:
plot_one_summary_corymbosa, plot_one_df_corymbosa = do_analysis(parcelas_df.query("parcela == 1"), 1)
plot_one_summary_corymbosa

Unnamed: 0,value
mean_x,2.2
total_n,25.0
chi2_statistic,10.758127
chi2_critical_0dot05,11.070498
chi2_p_value,6e-05
observed_variance,3.5
coeficiente_dispersion,1.590909
t_statistic,2.046969
t_critical_0dot05,1.710882
t_p_value,0.025881


In [144]:
plot_one_df_corymbosa

Unnamed: 0,x,observed_f,P_x,expected_f
0,0,6,0.110803,2.770079
1,1,4,0.243767,6.094174
2,2,5,0.268144,6.703591
3,3,4,0.196639,4.915967
4,4,3,0.108151,2.703782
5,5,1,0.047587,1.189664
6,6,2,0.017448,0.43621


#### 3.2.3. Solo parcela 2 - S. corymbosa

In [145]:
plot_two_summary_corymbosa, plot_two_df_corymbosa = do_analysis(parcelas_df.query("parcela == 2"), 1)
plot_two_summary_corymbosa

Unnamed: 0,value
mean_x,2.04
total_n,25.0
chi2_statistic,71.13947
chi2_critical_0dot05,14.06714
chi2_p_value,1.425948e-11
observed_variance,5.956667
coeficiente_dispersion,2.919935
t_statistic,6.650849
t_critical_0dot05,1.710882
t_p_value,3.51788e-07


In [146]:
plot_two_df_corymbosa

Unnamed: 0,x,observed_f,P_x,expected_f
0,0,9,0.130029,3.250718
1,1,7,0.265259,6.631464
2,2,1,0.270564,6.764094
3,3,1,0.183983,4.599584
4,4,1,0.093832,2.345788
5,5,3,0.038283,0.957081
6,6,2,0.013016,0.325408
7,7,0,0.003793,0.094833
8,8,1,0.000967,0.024182


#### 3.2.4.  Solo parcela 3 - S. corymbosa

In [147]:
plot_three_summary_corymbosa, plot_three_df_corymbosa= do_analysis(parcelas_df.query("parcela == 3"), 1)
plot_three_summary_corymbosa

Unnamed: 0,value
mean_x,3.68
total_n,25.0
chi2_statistic,76.68479
chi2_critical_0dot05,15.50731
chi2_p_value,4.660716e-13
observed_variance,9.393333
coeficiente_dispersion,2.552536
t_statistic,5.378143
t_critical_0dot05,1.710882
t_p_value,8.001557e-06


In [148]:
plot_three_df_corymbosa

Unnamed: 0,x,observed_f,P_x,expected_f
0,0,6,0.025223,0.630574
1,1,1,0.092821,2.320514
2,2,4,0.17079,4.269745
3,3,2,0.209502,5.237554
4,4,2,0.192742,4.81855
5,5,3,0.141858,3.546453
6,6,1,0.087006,2.175158
7,7,1,0.04574,1.143511
8,8,4,0.021041,0.526015
9,9,1,0.008603,0.215082


# Exportar

In [149]:
reporte_argentea_df = all_plots_summary_argentea.copy()
reporte_argentea_df.rename(columns = {"value": "todas"}, inplace = True)
reporte_argentea_df = reporte_argentea_df.join(plot_one_summary_argentea.rename(columns = {"value": "parcela_1"}))
reporte_argentea_df = reporte_argentea_df.join(plot_two_summary_argentea.rename(columns = {"value": "parcela_2"}))
reporte_argentea_df = reporte_argentea_df.join(plot_three_summary_argentea.rename(columns = {"value": "parcela_3"}))
reporte_argentea_df.to_csv("./resultados_argentea.csv")
reporte_argentea_df

Unnamed: 0,todas,parcela_1,parcela_2,parcela_3
mean_x,2.12,4.92,0.56,0.88
total_n,75.0,25.0,25.0,25.0
chi2_statistic,158945.000829,226.6285,154.7143,631.0756
chi2_critical_0dot05,21.02607,21.02607,9.487729,11.0705
chi2_p_value,0.0,0.0,5.234541e-09,9.481305e-14
observed_variance,10.431351,14.82667,1.34,3.693333
coeficiente_dispersion,4.920449,3.01355,2.392857,4.19697
t_statistic,23.847159,6.975142,4.824999,11.07463
t_critical_0dot05,1.665707,1.710882,1.710882,1.710882
t_p_value,0.0,1.632926e-07,3.240344e-05,3.229372e-11


In [150]:
reporte_corymbosa_df = all_plots_summary_corymbosa.copy()
reporte_corymbosa_df.rename(columns = {"value": "todas"}, inplace = True)
reporte_corymbosa_df = reporte_corymbosa_df.join(plot_one_summary_corymbosa.rename(columns = {"value": "parcela_1"}))
reporte_corymbosa_df = reporte_corymbosa_df.join(plot_two_summary_corymbosa.rename(columns = {"value": "parcela_2"}))
reporte_corymbosa_df = reporte_corymbosa_df.join(plot_three_summary_corymbosa.rename(columns = {"value": "parcela_3"}))
reporte_corymbosa_df.to_csv("./resultados_corymbosa.csv")
reporte_corymbosa_df

Unnamed: 0,todas,parcela_1,parcela_2,parcela_3
mean_x,2.64,2.2,2.04,3.68
total_n,75.0,25.0,25.0,25.0
chi2_statistic,139.4858,10.758127,71.13947,76.68479
chi2_critical_0dot05,15.50731,11.070498,14.06714,15.50731
chi2_p_value,3.885781e-15,6e-05,1.425948e-11,4.660716e-13
observed_variance,6.665946,3.5,5.956667,9.393333
coeficiente_dispersion,2.52498,1.590909,2.919935,2.552536
t_statistic,9.276088,2.046969,6.650849,5.378143
t_critical_0dot05,1.665707,1.710882,1.710882,1.710882
t_p_value,2.509104e-14,0.025881,3.51788e-07,8.001557e-06
