# Cheat Sheet

This is **the only acceptable auxiliary means** for the exercises and the exam.

## Imports

In [25]:
import math
import numpy as np
import pandas as pd
from pyreadr import read_r
from scipy import stats
from statsmodels.stats.proportion import proportion_confint, samplesize_confint_proportion, proportions_ztest

## Utility functions

In [26]:
def t_confint(series: pd.Series, alpha: float = 0.05):
    n = series.size
    return stats.t.interval(1 - alpha, # confidence
                            df = n - 1, # degrees of freedom = sample size minus One
                            loc = series.mean(),
                            scale = series.std() / math.sqrt(n) # std. deviation by square root of sample size
                            )

def critical_value_norm(alpha):
    return stats.norm.ppf(alpha / 2)

def samplesize_confint_norm(std, half_length, alpha):
    z = critical_value_norm(alpha)
    return (z**2 * std**2) / half_length**2

## Read R data file

In [27]:
data = read_r('../data/devore7/ex01.13.rda')
df = data['ex01.13']

## Statistical Parameters

In [43]:
mean = df['strength'].mean()
median = df['strength'].median()
modus = df['strength'].mode()
range = (df['strength'].min(), df['strength'].max())
std = df['strength'].std()
var = df['strength'].var()
cov = std / mean

4.541858977570332

## Frequencies

In [29]:
freq = pd.DataFrame()
freq['absolute'] = df['strength'].value_counts(bins=7)
freq['relative'] = df['strength'].value_counts(bins=7, normalize=True)
freq.sort_index().cumsum()

Unnamed: 0,absolute,relative
"(121.97399999999999, 125.571]",4,0.026144
"(125.571, 129.143]",17,0.111111
"(129.143, 132.714]",38,0.248366
"(132.714, 136.286]",103,0.673203
"(136.286, 139.857]",131,0.856209
"(139.857, 143.429]",148,0.96732
"(143.429, 147.0]",153,1.0


## Confidence Intervals

In [30]:
ci = t_confint(series=pd.Series([.0]), alpha= 1 - .95)

ci = proportion_confint(count=1, nobs=2, alpha = 1 - .95)

n = samplesize_confint_norm(std=1., half_length=2.5, alpha=0.05)

n = samplesize_confint_proportion(proportion=.5, half_length=2.5, alpha=0.05)

## Hypothesis Testing

In [None]:
stat, pval = proportions_ztest(count=1, nobs=2, value=.5,
                               alternative='two-sided')
stat, pval = proportions_ztest(count=[], nobs=[],
                               alternative='two-sided')
stat, pval = stats.ttest_1samp(sample, popmean=42,
                               alternative='two-sided')
stat, pval = stats.ttest_ind(sample1, sample2,
                             alternative='two-sided')
stat, pval = stats.ttest_rel(sample1, sample2,
                             alternative='two-sided')

| alternative  | ttest     | proportions |
|--------------|-----------|-------------|
| not equal    | two-sided | two-sided   |
| less than    | less      | smaller     |
| greater than | greater   | larger      |