In [None]:
# !pip install pandas numpy seaborn scipy pingouin openpyxl

In [None]:
import pandas as pd
import numpy as np
import seaborn.objects as so
import seaborn as sns
from scipy import stats
import math
import pingouin as pg

In [None]:
xl_path = 'Lista de Exercicios - Complementaresxlsx Portugues.xlsx'

# Lista de Exercícios

## Exercício 1

In [None]:
def freq(df: pd.DataFrame, col: str, sort_index=False) -> pd.DataFrame:
    res = (
        df.loc[:,col]
        .value_counts()
        .to_frame(name='cnt')
        .pipe(lambda df: df.sort_index() if sort_index else df)
        .assign(
            relative_freq=lambda df: df.cnt.div(df.cnt.sum()).mul(100),
            cumcnt=lambda df: df.cnt.cumsum(),
            cum_relative_freq=lambda df: df.cumcnt.div(df.cnt.sum()).mul(100),
        )
    )
    return res

In [None]:
(
    pd.read_excel(
        xl_path,
        sheet_name='Exercício 1',
        usecols=[0,1],
        nrows=50,
    )
    .assign(
        bins=lambda df: df['Renda (R$)'].pipe(pd.cut,bins=[0,2000,4000,6000,8000,10000,12000]),
    )
    .pipe(freq,col='bins',sort_index=True)
)

---

## Exercício 2

In [None]:
def describe_more(ser: pd.Series, **kwargs) -> pd.Series:
    res = (    
        ser.describe(**kwargs)
        .to_frame()
        .T
        .assign(
            amplitude=ser.max()-ser.min(),
            var=ser.var(),
            stderr=ser.sem(),
            cov=(ser.std()/ser.mean())*100,
            mode=ser.mode().values if len(ser.mode().values) == 1 else np.nan,
            skew=ser.skew(),
            kurtosis=ser.kurt(),
        )
        .T
        .iloc[:,0]
    )
    return res


In [None]:
exerc2_df = (
    pd.read_excel(
        xl_path,
        sheet_name='Exercício 2',
        usecols=[0,1,2],
        nrows=24,
        index_col=0,
    )
)

In [None]:
(
    exerc2_df.T
    .groupby(level=0)
    .apply(lambda df: df.iloc[0].pipe(describe_more,percentiles=[.25,.75,.8,.9,.27,.64]))
    .T
    .round(4)
)

In [None]:
i, j = exerc2_df.shape
ddof = i-j
r, pvalue = stats.pearsonr(*exerc2_df.T.to_numpy())
T = r/math.sqrt((1-(r**2))/(exerc2_df.count().iloc[0]-2))
critical05 = stats.t.ppf(1-.05/2, ddof)  # 2-tail, thus we divide by 2

In [None]:
print(f'''r: {r:.4f}
T: {T:.4f}
pvalue: {pvalue:.4f}
Critical (.05): {critical05:.4f}
''')

---

## Exercício 7

In [None]:
exerc7_df = (
    pd.read_excel(
        xl_path,
        sheet_name='Exercício 7',
        usecols=[1,2,3],
        nrows=1,
    )
    .T
    .rename(columns={0: 'f_obs'})
    .assign(
        f_exp=lambda df: (df.sum()/len(df.index)).iloc[0]
    )
)
exerc7_df

In [None]:
ddof = len(exerc7_df.index)-1
alpha = .05
chi2val, _ = stats.chisquare(exerc7_df.f_obs, exerc7_df.f_exp, ddof=ddof)
pvalue = stats.chi2.sf(chi2val,ddof)
critical05 = stats.chi2.ppf(1-alpha, ddof)

In [None]:
print(f'''χ²: {chi2val:.3f}
pvalue: {pvalue:.4f}
Critical (.05): {critical05:.3f}
''')

---

## Exercício 8

In [None]:
def ddof(df: pd.DataFrame) -> pd.Series:
    return df.count()-1

In [None]:
def ftest(df: pd.DataFrame, alpha: float) -> tuple[float, float, float]:
    dfn, dfd = ddof(df)
    varx, vary = df.var()
    fvar = varx / vary
    pvalue = stats.f.sf(fvar, dfn, dfd)
    crit = stats.f.ppf(1-alpha, dfn, dfd)
    return fvar, pvalue, crit

In [None]:
exerc8_df = (
    pd.read_excel(
        xl_path,
        sheet_name='Exercício 8',
        usecols=[0,1],
        nrows=14,
    )
)

In [None]:
exerc8_df.agg(['mean','var','count',ddof])

In [None]:
fvar, pvalue, critical05 = ftest(exerc8_df, alpha=.05)

In [None]:
print(f'''F var: {fvar:.3f}
pvalue: {pvalue:.5f}
Critical (.05): {critical05:.3f}
''')

---

## Exercício 9

In [None]:
exerc9_df = (
    pd.read_excel(
        xl_path,
        sheet_name='Exercício 9',
        usecols=[0,1],
        nrows=20,
    )
)

In [None]:
exerc9_df.agg(['mean','var','count',ddof])

In [None]:
fvar, pvalue, critical01 = ftest(exerc9_df, alpha=.01)

In [None]:
print(f'''F var: {fvar:.3f}
pvalue: {pvalue:.3f}
Critical (.01): {critical01:.3f}
''')

In [None]:
(
    pg.ttest(*exerc9_df.T.to_numpy(),confidence=.99)
    .assign(
        critical01=lambda df: stats.t.ppf(1-(.01/2), df.dof),
    )
)

---

## Exercício 10

$ \displaystyle t = \frac{\bar{x} - \mu _{0}}{s/\sqrt{n}} $

In [None]:
T = (
    (65-60)/
    (3.5/math.sqrt(36))
)

In [None]:
alpha = .05
ddof = 36-1
pvalue = stats.t.sf(T,ddof)*2  # 2-tail, so we multiply by two
critical05 = stats.t.ppf(1-(alpha/2), ddof)

In [None]:
print(f'''T: {T:.3f}
pvalue: {pvalue:.3f}
Critical (.05): {critical05:.3f}
''')