In [1]:
#python3
# Hans D. Escobar H. (hdescobarh@unal.edu.co)

import numpy as np
import pandas as pd
from scipy.stats import t
from scipy.stats import chi2

# Ejercicio sobre patrón de disposición espacial

Secciones:
1. Datos
2. Simbolos y ecuaciones<br>
    2.1. Generales<br>
    2.2. Para prueba Chi-cuadrado<br>
    2.3. Para prueba t-Student<br>
3. Resultados<br>
    3.1. S. argentea<br>
    3.2. S. corymbosa<br>

## 1. Datos

In [2]:
parcelas_dtypes = {'parcela': str, 'subparcela': str, 'individuos_corymbosa': np.uint32, 'individuos_argentea': np.uint32}
parcelas_df = pd.read_csv("./datos_espeletia_verjon.csv", header= 1)
parcelas_df

Unnamed: 0,parcela,subparcela,individuos_corymbosa,individuos_argentea
0,1,1,3,2
1,1,2,1,5
2,1,3,2,9
3,1,4,6,5
4,1,5,3,13
...,...,...,...,...
70,3,21,4,0
71,3,22,8,0
72,3,23,8,0
73,3,24,9,0


## 2. Simbolos y ecuaciones

### Generales

- $x$. Numeros de individuos en parcela.
- $f_{obs}(x)$. Frecuencia absoluta *observada*. Numero de parcelas con x individuos *observadas*.
- $k$. Máximo número de individuos observado en una parcela.
- $n = \sum_{x=0}^k f_{obs}(x)$. Número total de individuos (incluyendo todas las parcelas)
- $\bar x$ media de individuos por parcela:
$$\bar x = \frac{\sum_{x=0}^k(f_{obs}(x) \cdot x)}{n}$$

### 2.1 Para prueba con Chi-cuadrado

- $P(x)$ Frecuencia o masa de probabilidad para una distribución de Poisson. Probabilidad de $x$. Prestar atención que el numero de euler esta elevado es a la media.
$$P(x) = \frac{\bar x^{x}}{e^{\bar x}  \cdot x!}$$
- $f_{esp}(x)$. Frecuencia absoluta *esperada*. Número de parcelas con x individuos *esperados*.
$$f_{esp}(x) = n \cdot P(x)$$
- $χ^2$. Estadístico Chi-cuadrado. Grados de libertad: $(k + 1) - 2$.
$$χ^2 = \sum_{x=0}^k(\frac{(f_{obs}(x)-f_{esp}(x))^2}{f_{esp}(x)})$$

### 2.2. Para prueba con t-Student

- $S_x^2$. Varianza muestral.
$$S_x^2 = \frac{\sum_{x=0}^k(f_{obs}(x) \cdot (x - \bar x)^2)}{n-1}$$
- $C.D.$ Coeficiente de dispersión (Blackman, 1942). Sí se tiene una distribución Poisson, $\bar x = S_x^2$.
$$\text{C.D.} = \frac{S_x^2}{\bar x}$$

| disposición | valor del coeficiente |
| ---- | ---- |
| uniforme | < 1 |
| azar | 1 |
| contagiosa | >1 |

- $S_e$. Error estandar.
$$S = \sqrt{\frac{2}{n-1}}$$
- $t_obs$ Estádistico t-Student. Grados de libertad: $n-1$.
$$t_{obs} = \frac{\text{C.D.}-1}{S_e}$$



In [3]:
def do_analysis_argentea(parcelas_df, specie_index: int):
    specie = ["individuos_argentea", "individuos_corymbosa"][specie_index]
    mean_individuals = parcelas_df[specie].mean()
    total_individuals = parcelas_df[specie].sum()
    all_plots: dict[int, object]= dict(parcelas_df[specie].value_counts())
    for id in range(0, max(all_plots.keys())):
        if id not in all_plots.keys():
            all_plots[id] = 0
    all_plots_df= pd.DataFrame.from_dict(all_plots, orient="index", columns= ["observed_f"]).sort_index().reset_index()
    all_plots_df.rename(columns= {"index": "x"}, inplace= True)
    # chi-square test
    all_plots_df["P_x"] = all_plots_df.apply(lambda row: (np.power(mean_individuals,row.x) / ( np.exp(mean_individuals) *  np.math.factorial(row.x) )), axis=1) # type: ignore
    all_plots_df["expected_f"] = all_plots_df.apply(lambda row: total_individuals * row.P_x, axis=1) # type: ignore
    chi2_statistic = all_plots_df.apply(lambda row: (np.power(row.observed_f - row.expected_f, 2))/ row.expected_f, axis=1).sum()/(total_individuals-1)
    chi2_freedom_degrees = len(all_plots_df.index) - 2 # total categories - 2
    chi2_critical = chi2.ppf(0.95, chi2_freedom_degrees) 
    chi2_p_value = 1-t.cdf(chi2_statistic, chi2_freedom_degrees)

    # t-Student test
    observed_variance = all_plots_df.apply(lambda row: row.observed_f* np.power((row.x - mean_individuals), 2), axis=1).sum() / (total_individuals-1)
    coeficiente_dispersion = observed_variance / mean_individuals
    t_statistic = (coeficiente_dispersion - 1)/(np.sqrt(2/(total_individuals-1)))
    t_freedom_degrees = total_individuals-1
    t_critical = t.ppf(0.95, t_freedom_degrees)
    t_p_value = 1-t.cdf(t_statistic, t_freedom_degrees)
    summary = pd.DataFrame.from_dict({
        "mean_x": mean_individuals,
        "total_n": total_individuals,
        "chi2_statistic": chi2_statistic,
        "chi2_critical_0dot05": chi2_critical,
        "chi2_p_value": chi2_p_value,
        "observed_variance": observed_variance,
        "coeficiente_dispersion": coeficiente_dispersion,
        "t_statistic": t_statistic,
        "t_critical_0dot05": t_critical,
        "t_p_value": t_p_value
    }, orient = "index", columns = ["value"])
    output = summary, all_plots_df
    return output
    

## 3. Resultados

### 3.1. S. argentea

#### 3.1.1 Todas las parcelas - S. argentea

In [4]:
all_plots_summary_argentea, all_plots_df_argentea = do_analysis_argentea(parcelas_df, 0)
all_plots_summary_argentea

Unnamed: 0,value
mean_x,2.12
total_n,159.0
chi2_statistic,474.800217
chi2_critical_0dot05,21.02607
chi2_p_value,0.0
observed_variance,4.88557
coeficiente_dispersion,2.304514
t_statistic,11.594774
t_critical_0dot05,1.654555
t_p_value,0.0


In [5]:
all_plots_df_argentea

Unnamed: 0,x,observed_f,P_x,expected_f
0,0,40,0.1200316,19.085029
1,1,10,0.2544671,40.460261
2,2,2,0.2697351,42.887877
3,3,3,0.1906128,30.307433
4,4,3,0.1010248,16.06294
5,5,6,0.04283451,6.810686
6,6,3,0.01513486,2.406443
7,7,1,0.0045837,0.728808
8,8,2,0.001214681,0.193134
9,9,3,0.0002861247,0.045494


#### 3.1.2 Solo parcela 1 - S. argentea

In [6]:
plot_one_summary_argentea, plot_one_df_argentea = do_analysis_argentea(parcelas_df.query("parcela == 1"), 0)
plot_one_summary_argentea

Unnamed: 0,value
mean_x,4.92
total_n,123.0
chi2_statistic,1.016995
chi2_critical_0dot05,21.02607
chi2_p_value,0.164611
observed_variance,2.916721
coeficiente_dispersion,0.59283
t_statistic,-3.180103
t_critical_0dot05,1.657439
t_p_value,0.999067


In [7]:
plot_one_df_argentea

Unnamed: 0,x,observed_f,P_x,expected_f
0,0,4,0.007299,0.897793
1,1,2,0.035912,4.417142
2,2,2,0.088343,10.866169
3,3,1,0.144882,17.820518
4,4,3,0.178205,21.919237
5,5,4,0.175354,21.568529
6,6,1,0.14379,17.686194
7,7,1,0.101064,12.430868
8,8,2,0.062154,7.644984
9,9,3,0.033978,4.179258


#### 3.1.3 Solo parcela 2 - S. argentea

In [8]:
plot_two_summary_argentea, plot_two_df_argentea = do_analysis_argentea(parcelas_df.query("parcela == 2"), 0)
plot_two_summary_argentea

Unnamed: 0,value
mean_x,0.56
total_n,14.0
chi2_statistic,21.91686
chi2_critical_0dot05,9.487729
chi2_p_value,1.282343e-05
observed_variance,2.473846
coeficiente_dispersion,4.417582
t_statistic,8.71316
t_critical_0dot05,1.770933
t_p_value,4.337483e-07


In [9]:
plot_two_df_argentea

Unnamed: 0,x,observed_f,P_x,expected_f
0,0,17,0.571209,7.996927
1,1,6,0.319877,4.478279
2,2,0,0.089566,1.253918
3,3,1,0.016719,0.234065
4,4,0,0.002341,0.032769
5,5,1,0.000262,0.00367


#### 3.1.4 Solo parcela 3 - S. argentea

In [10]:
plot_three_summary_argentea, plot_three_df_argentea = do_analysis_argentea(parcelas_df.query("parcela == 3"), 0)
plot_three_summary_argentea

Unnamed: 0,value
mean_x,0.88
total_n,22.0
chi2_statistic,34.16861
chi2_critical_0dot05,11.0705
chi2_p_value,2.019111e-07
observed_variance,4.220952
coeficiente_dispersion,4.796537
t_statistic,12.30219
t_critical_0dot05,1.720743
t_p_value,2.298328e-11


In [11]:
plot_three_df_argentea

Unnamed: 0,x,observed_f,P_x,expected_f
0,0,19,0.414783,9.125224
1,1,2,0.365009,8.030197
2,2,0,0.160604,3.533287
3,3,1,0.04711,1.036431
4,4,0,0.010364,0.228015
5,5,1,0.001824,0.040131
6,6,2,0.000268,0.005886


### 3.2. S. corymbosa

#### 3.2.1. Todas las parcelas - S. corymbosa

In [12]:
all_plots_summary_corymbosa, all_plots_df_corymbosa = do_analysis_argentea(parcelas_df, 1)
all_plots_summary_corymbosa

Unnamed: 0,value
mean_x,2.64
total_n,198.0
chi2_statistic,0.6557
chi2_critical_0dot05,15.507313
chi2_p_value,0.265209
observed_variance,2.503959
coeficiente_dispersion,0.948469
t_statistic,-0.511426
t_critical_0dot05,1.652625
t_p_value,0.695187


In [13]:
all_plots_summary_corymbosa

Unnamed: 0,value
mean_x,2.64
total_n,198.0
chi2_statistic,0.6557
chi2_critical_0dot05,15.507313
chi2_p_value,0.265209
observed_variance,2.503959
coeficiente_dispersion,0.948469
t_statistic,-0.511426
t_critical_0dot05,1.652625
t_p_value,0.695187


#### 3.2.2.  Solo parcela 1 - S. corymbosa

In [14]:
plot_one_summary_corymbosa, plot_one_df_corymbosa = do_analysis_argentea(parcelas_df.query("parcela == 1"), 1)
plot_one_summary_corymbosa

Unnamed: 0,value
mean_x,2.2
total_n,55.0
chi2_statistic,0.387558
chi2_critical_0dot05,11.070498
chi2_p_value,0.357146
observed_variance,1.555556
coeficiente_dispersion,0.707071
t_statistic,-1.522105
t_critical_0dot05,1.673565
t_p_value,0.933091


In [15]:
plot_one_df_corymbosa

Unnamed: 0,x,observed_f,P_x,expected_f
0,0,6,0.110803,6.094174
1,1,4,0.243767,13.407182
2,2,5,0.268144,14.7479
3,3,4,0.196639,10.815127
4,4,3,0.108151,5.94832
5,5,1,0.047587,2.617261
6,6,2,0.017448,0.959662


#### 3.2.3. Solo parcela 2 - S. corymbosa

In [16]:
plot_two_summary_corymbosa, plot_two_df_corymbosa = do_analysis_argentea(parcelas_df.query("parcela == 2"), 1)
plot_two_summary_corymbosa

Unnamed: 0,value
mean_x,2.04
total_n,51.0
chi2_statistic,0.962332
chi2_critical_0dot05,14.06714
chi2_p_value,0.18397
observed_variance,2.8592
coeficiente_dispersion,1.401569
t_statistic,2.007843
t_critical_0dot05,1.675905
t_p_value,0.025039


In [17]:
plot_two_df_corymbosa

Unnamed: 0,x,observed_f,P_x,expected_f
0,0,9,0.130029,6.631464
1,1,7,0.265259,13.528187
2,2,1,0.270564,13.798751
3,3,1,0.183983,9.383151
4,4,1,0.093832,4.785407
5,5,3,0.038283,1.952446
6,6,2,0.013016,0.663832
7,7,0,0.003793,0.19346
8,8,1,0.000967,0.049332


#### 3.2.4.  Solo parcela 3 - S. corymbosa

In [18]:
plot_three_summary_corymbosa, plot_three_df_corymbosa= do_analysis_argentea(parcelas_df.query("parcela == 3"), 1)
plot_three_summary_corymbosa

Unnamed: 0,value
mean_x,3.68
total_n,92.0
chi2_statistic,0.760808
chi2_critical_0dot05,15.507313
chi2_p_value,0.2343
observed_variance,2.477363
coeficiente_dispersion,0.673196
t_statistic,-2.204411
t_critical_0dot05,1.661771
t_p_value,0.984991


In [19]:
plot_three_df_corymbosa

Unnamed: 0,x,observed_f,P_x,expected_f
0,0,6,0.025223,2.320514
1,1,1,0.092821,8.53949
2,2,4,0.17079,15.712662
3,3,2,0.209502,19.274199
4,4,2,0.192742,17.732263
5,5,3,0.141858,13.050946
6,6,1,0.087006,8.00458
7,7,1,0.04574,4.208122
8,8,4,0.021041,1.935736
9,9,1,0.008603,0.791501


# Exportar

In [20]:
reporte_argentea_df = all_plots_summary_argentea.copy()
reporte_argentea_df.rename(columns = {"value": "todas"}, inplace = True)
reporte_argentea_df = reporte_argentea_df.join(plot_one_summary_argentea.rename(columns = {"value": "parcela_1"}))
reporte_argentea_df = reporte_argentea_df.join(plot_two_summary_argentea.rename(columns = {"value": "parcela_2"}))
reporte_argentea_df = reporte_argentea_df.join(plot_three_summary_argentea.rename(columns = {"value": "parcela_3"}))
reporte_argentea_df.to_csv("./resultados_argentea.csv")
reporte_argentea_df

Unnamed: 0,todas,parcela_1,parcela_2,parcela_3
mean_x,2.12,4.92,0.56,0.88
total_n,159.0,123.0,14.0,22.0
chi2_statistic,474.800217,1.016995,21.91686,34.16861
chi2_critical_0dot05,21.02607,21.02607,9.487729,11.0705
chi2_p_value,0.0,0.164611,1.282343e-05,2.019111e-07
observed_variance,4.88557,2.916721,2.473846,4.220952
coeficiente_dispersion,2.304514,0.59283,4.417582,4.796537
t_statistic,11.594774,-3.180103,8.71316,12.30219
t_critical_0dot05,1.654555,1.657439,1.770933,1.720743
t_p_value,0.0,0.999067,4.337483e-07,2.298328e-11


In [21]:
reporte_corymbosa_df = all_plots_summary_corymbosa.copy()
reporte_corymbosa_df.rename(columns = {"value": "todas"}, inplace = True)
reporte_corymbosa_df = reporte_corymbosa_df.join(plot_one_summary_corymbosa.rename(columns = {"value": "parcela_1"}))
reporte_corymbosa_df = reporte_corymbosa_df.join(plot_two_summary_corymbosa.rename(columns = {"value": "parcela_2"}))
reporte_corymbosa_df = reporte_corymbosa_df.join(plot_three_summary_corymbosa.rename(columns = {"value": "parcela_3"}))
reporte_corymbosa_df.to_csv("./resultados_corymbosa.csv")
reporte_corymbosa_df

Unnamed: 0,todas,parcela_1,parcela_2,parcela_3
mean_x,2.64,2.2,2.04,3.68
total_n,198.0,55.0,51.0,92.0
chi2_statistic,0.6557,0.387558,0.962332,0.760808
chi2_critical_0dot05,15.507313,11.070498,14.06714,15.507313
chi2_p_value,0.265209,0.357146,0.18397,0.2343
observed_variance,2.503959,1.555556,2.8592,2.477363
coeficiente_dispersion,0.948469,0.707071,1.401569,0.673196
t_statistic,-0.511426,-1.522105,2.007843,-2.204411
t_critical_0dot05,1.652625,1.673565,1.675905,1.661771
t_p_value,0.695187,0.933091,0.025039,0.984991
