# Importando librerias

In [70]:
import pandas as pd
import numpy as np
from bokeh.io import output_notebook,show
from bokeh.plotting import figure
from bokeh.layouts import layout
import sqlite3

In [71]:
from bokeh.models import (ColumnDataSource, DataRange1d, Legend, Line, LinearAxis,
                          PanTool, Plot, SaveTool, Scatter, WheelZoomTool)

In [72]:
# Librerias para el analisis estadistico
import statsmodels.api as sm
from scipy import stats

In [73]:
import math

# Realizando conexiones

In [74]:
conection=sqlite3.connect('E:/DB SQL3/DATA_BASE_FLOWERS_SIZE/DB_FLOWERS_SIZE.db')
query_flowers="""
    SELECT *
    FROM Flower_Size
    """

In [75]:
DataFrameFlowers=pd.read_sql_query(query_flowers,conection)

# Analisis de datos

In [76]:
DataFrameFlowers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1741 entries, 0 to 1740
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype
---  ------        --------------  -----
 0   Id_Event      1741 non-null   int64
 1   Id_scientist  1741 non-null   int64
 2   Size_flower   1741 non-null   int64
dtypes: int64(3)
memory usage: 40.9 KB


In [77]:
size_flowers=DataFrameFlowers[['Size_flower']].copy()
size_flowers[['Size_flower']]=size_flowers[['Size_flower']]/1000000

In [78]:
# Valores de la media (mu) y desviación típica (sigma) de los datos
mu, sigma = stats.norm.fit(size_flowers)


In [79]:
#Calculando el numero de intervalos del histograma
N=size_flowers.Size_flower.count()
Number_intervals=int(round(1+3.322*math.log10(N),0))
print(N,Number_intervals)

1741 12


In [80]:
# Valores teóricos de la normal en el rango observado
x_hat = np.linspace(min(size_flowers.Size_flower), max(size_flowers.Size_flower), num=N)
y_hat = stats.norm.pdf(x_hat, mu, sigma)

In [81]:
print("Mean with pandas",size_flowers.Size_flower.mean()," Stats Mu",mu)
print("StdDev with pandas",size_flowers.Size_flower.std()," Stats Sigma",sigma)
print("The mean and STD Dev is ecual with the two methods")

Mean with pandas 3669.9197450580127  Stats Mu 3669.9197450580127
StdDev with pandas 2675.218036883959  Stats Sigma 2674.4496272639476
The mean and STD Dev is ecual with the two methods


## Datos de la grafica Q-Q

In [82]:
# Grafica Q-Q
Values_order=size_flowers.sort_values(by='Size_flower')  

In [83]:
N_Values_order=Values_order.Size_flower.count()
N_Values_order

1741

In [84]:
Values_order=Values_order.reset_index(drop=True)
Values_order=Values_order.reset_index()


In [85]:
Values_order['i']=Values_order['index']+1
Values_order['j']=(Values_order['i']-(1/2))/N_Values_order
Values_order['Z']=stats.norm().ppf(Values_order['j'])


## Curtosis y asimetria

In [86]:
print('Kursotis:', stats.kurtosis(size_flowers.Size_flower))
print('Skewness:', stats.skew(size_flowers.Size_flower))

Kursotis: -0.5557086667017597
Skewness: 0.9097228938537244


## Test Shapiro -Wilk

### H0 : Los datos tienen normalidad
###  H1: Los datos no tienen normalidad

In [87]:
# Shapiro-Wilk test
# ==============================================================================
shapiro_test = stats.shapiro(size_flowers.Size_flower)
shapiro_test

ShapiroResult(statistic=0.8186993598937988, pvalue=1.671538873169858e-40)

## Test  K-square

### H0 : Los datos tienen normalidad
###  H1: Los datos no tienen normalidad

In [88]:
# D'Agostino's K-squared test
# ==============================================================================
k2, p_value = stats.normaltest(size_flowers.Size_flower)
print(f"Estadístico = {k2}, p-value = {p_value}")

Estadístico = 224.06785933579874, p-value = 2.2094417654625057e-49


# Generación de graficas

In [89]:
figure_1=figure(title='Histogram of size flowers',plot_width=1600,plot_height=400)
hist,edges=np.histogram(size_flowers.Size_flower,density=True,bins=Number_intervals)
figure_1.xgrid.grid_line_color=None
figure_1.ygrid.grid_line_alpha=0.7
figure_1.xaxis.axis_label='Size'
figure_1.yaxis.axis_label='Probability desity'
figure_1.y_range.start = 0
figure_1.x_range.start = 0
figure_1.quad(top=hist,bottom=0,left=edges[:-1],right=edges[1:],
      fill_color='#008080',line_color='black',legend_label='Size Flower')
figure_1.line(x_hat,y_hat,color='#181515',line_width=2)

# Construyendo graficos Q-Q

In [90]:
figure_2=figure(title='Q-Q of size flowers',plot_width=1600,plot_height=400)
figure_2.xgrid.grid_line_color=None
figure_2.ygrid.grid_line_alpha=0.7
figure_2.xaxis.axis_label='Size'
figure_2.yaxis.axis_label='Probability desity'
figure_2.y_range.start = 0
figure_2.x_range.start = -5
figure_2.scatter(Values_order.Z,Values_order.Size_flower,legend_label=f"Legend at\n{k2!r}\n")


# Mostrando Datos

In [91]:
layout_total=layout([[figure_1],[figure_2]])
show(layout_total)