# Criando gráficos para exemplificar o coeficiente de Pearson

Este notebook contém script para criar gráficos que compara o dataset com o intervalo de confiança do coeficiente de person variando o tamanho amostral do dataset.

Ele gera:

- Uma imagem em png para cada tamanho amostral do dataset ($n>4$);

![alt text](https://raw.githubusercontent.com/andersonmdcanteli/statistic/master/Python/pt-br/r-pearson/r-plot/1.png "Exemplo do gráfico de correlação de Pearson").


- Um video em .mp4 com todos os gráficos gerados;


- Um gif com todos os gráficos gerados;


![alt text](https://raw.githubusercontent.com/andersonmdcanteli/statistic/master/Python/pt-br/r-pearson/gif.gif "gif gerado com os gráficos de Pearson")




## Importações

> pip install -r /path/to/requirements.txt

In [1]:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import os
import cv2
from moviepy.editor import VideoFileClip
import random

## Configurações

In [2]:
plt.rcParams["font.family"] = "Arial"
plt.rcParams["font.size"] = 11

video_name = "r-plot-scatter-teoretical"
gif_name = "r-plot-scatter-teoretical"
directory = 'r-plot-scatter-teoretical' # directory to save the figures

### Criando a pasta para salvar as figuras

In [3]:
current_directory = os.getcwd()
path = current_directory + "\\" + directory
os.makedirs(path)

## Dataset

In [4]:
nasal_length = np.array([609, 629, 620, 564, 645, 493, 606, 660, 630, 672, 778, 616, 727, 810, 778, 823, 755, 710, 701,
                803, 855, 838, 830, 864, 635, 565, 562, 580, 596, 597, 636, 559, 615, 740, 677, 675, 629, 692,
                710, 730, 763, 686, 717, 737, 816])

nasal_width = np.array([241, 222, 233, 207, 247, 189, 226, 240, 215, 231, 263, 220, 271, 284, 279, 272, 268, 278, 238,
               255, 308, 281, 288, 306, 236, 204, 216, 225, 220, 219, 201, 213, 228, 234, 237, 217, 211, 238,
               221, 281, 292, 251, 231, 275, 275])

alpha=0.05

## Função

In [5]:
def pearson_interval(x, y, alpha=0.05):
    z_critical = stats.norm.ppf(1 - alpha/2)
    r_pearson, _ = stats.pearsonr(x, y)
    r_pearson_z_scale = np.arctanh(r_pearson)
    ic_z_scale = z_critical/np.sqrt(len(x)-3)
    ic_lower_z_scale = r_pearson_z_scale - ic_z_scale
    ic_upper_z_scale = r_pearson_z_scale + ic_z_scale
    ic_lower = np.tanh(ic_lower_z_scale)
    ic_upper = np.tanh(ic_upper_z_scale)    
    
    return ic_lower, r_pearson, ic_upper, alpha

## Criando os gráficos

In [8]:
aux = 0

# criando valores teóricos
r_values = np.linspace(-0.999, .999, 1000)
r_values_z_scale = np.arctanh(r_values)


# criando o último gráfico para obter os limites do eixos e ticks para padronizar
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,4))
ax1.scatter(nasal_length, nasal_width, label="Dados")
ax1.set_xlabel("Comprimento do nariz ($mm$)")
ax1.set_ylabel("Largura do nariz ($mm$)")
ax1.legend(loc=2)

ymin_1, ymax_1 = ax1.get_ylim()
yticks_1 = ax1.get_yticks()
xmin_1, xmax_1 = ax1.get_xlim()
xticks_1 = ax1.get_xticks()

ic_lower, r_pearson, ic_upper, alpha = pearson_interval(nasal_length, nasal_width, alpha=0.05)
ax2.plot(r_values, r_values_z_scale, label="Teórico", c="k", zorder=-1)
ax2.scatter(r_pearson, np.arctanh(r_pearson), label="$r_{pearson}$", c="r")
ax2.axvline(x=ic_lower, ymin=0, ymax=1, c="lightblue", ls="--", label="Limites de confiança ", )
ax2.axvline(x=ic_upper, ymin=0, ymax=1, c="lightblue", ls="--")
ax2.set_ylabel("$r_{pearson}$ transformado ($Z$)")
ax2.set_xlabel("$r_{pearson}$")
ax2.legend(loc=2)

ymin_2, ymax_2 = ax2.get_ylim()
yticks_2 = ax2.get_yticks()
xmin_2, xmax_2 = ax2.get_xlim()
xticks_2 = ax2.get_xticks()

plt.close()



# criando um gráfico para cada n maior do que 4
for i in range(5, len(nasal_length) + 1):
    aux = aux + 1
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,4))
    ax1.scatter(nasal_length[:i], nasal_width[:i], label="Dados")
    ax1.set_xlabel("Comprimento do nariz ($mm$)")
    ax1.set_ylabel("Largura do nariz ($mm$)")
    ax1.set_xticks(xticks_1)
    ax1.set_xlim(xmin_1, xmax_1)
    ax1.set_yticks(yticks_1)
    ax1.set_ylim(ymin_1, ymax_1)
    ax1.legend(loc=2)

    ic_lower, r_pearson, ic_upper, alpha = pearson_interval(nasal_length[:i], nasal_width[:i], alpha=0.05)

    ax2.plot(r_values, r_values_z_scale, label="Teórico", c="k", zorder=-1)
    ax2.scatter(r_pearson, np.arctanh(r_pearson), label="$r_{pearson}$", c="r")
    ax2.axvline(x=ic_lower, ymin=0, ymax=1, c="lightblue", ls="--", label="Limites de confiança ", )
    ax2.axvline(x=ic_upper, ymin=0, ymax=1, c="lightblue", ls="--")
    ax2.set_ylabel("$r_{pearson}$ transformado ($Z$)")
    ax2.set_xlabel("$r_{pearson}$")
    
    ax2.set_xticks(xticks_2)
    ax2.set_xlim(xmin_2, xmax_2)
    ax2.set_yticks(yticks_2)
    ax2.set_ylim(ymin_2, ymax_2)
    ax2.legend(loc=2)
    plt.savefig(f"{directory}/{aux}.png", dpi=100, bbox_inches='tight')
#     plt.show()
    plt.close()


## Criando o video

In [9]:
img_names = []
for i in range(1,aux+1):
    img_names.append(f"{i}.png")
    
frame = cv2.imread(os.path.join(path, img_names[0]))
height, width, layers = frame.shape

video = cv2.VideoWriter(f'{video_name}.mp4', 0, 1, (width,height))

for image in img_names:
    video.write(cv2.imread(os.path.join(directory, image)))

cv2.destroyAllWindows()
video.release()

## Criando o gif

In [10]:
# adapted from https://www.freecodecamp.org/news/how-to-convert-video-files-to-gif-in-python/

with VideoFileClip(f'{video_name}.mp4') as clip:
    clip.write_gif(f"{gif_name}.gif")
    clip.close()
    
    raise NotImplementedError("closing the clip by brute force, since .close() is not working properly")


MoviePy - Building file r-plot-scatter-teoretical.gif with imageio.


                                                                                                                       

NotImplementedError: closing the clip by brute force, since .close() is not working properly