In [1]:
import numpy as np
from scipy import stats
import pandas as pd
import itertools

### 4

In [2]:
weeks = np.array([49, 58, 75, 110, 112, 132, 151, 276, 281, 362])
m_0 = 200
w, p_value = stats.wilcoxon(weeks - m_0)
round(p_value, 4)

0.2845

### 5

In [3]:
no_cdt = np.array([22, 22, 15, 13, 19, 19, 18, 20, 21, 13, 13, 15])
cdt = np.array([17, 18, 18, 15, 12, 4, 14, 15, 10])
r1, p_value = stats.mannwhitneyu(no_cdt, cdt, alternative='greater')
round(p_value, 4)

0.029

### 6

In [4]:
challenger = pd.read_csv('challenger.txt', sep='\t')
challenger.rename(columns={'Unnamed: 0':'Date'}, inplace=True )
challenger.head()

Unnamed: 0,Date,Temperature,Incident
0,Apr12.81,18.9,0
1,Nov12.81,21.1,1
2,Mar22.82,20.6,0
3,Nov11.82,20.0,0
4,Apr04.83,19.4,0


In [5]:
incident_1 = challenger[challenger['Incident'] == 1]
incident_0 = challenger[challenger['Incident'] == 0]

In [6]:
def get_bootstrap_samples(data, n_samples):
    indices = np.random.randint(0, len(data), (n_samples, len(data)))
    samples = data[indices]
    return samples

def stat_intervals(stat, alpha):
    boundaries = np.percentile(stat, [100 * alpha / 2., 100 * (1 - alpha / 2.)])
    return boundaries

In [7]:
np.random.seed(0)
temperature_0_mean_scores = list(map(np.mean, get_bootstrap_samples(incident_0['Temperature'].values, 1000)))
temperature_1_mean_scores = list(map(np.mean, get_bootstrap_samples(incident_1['Temperature'].values, 1000)))
delta_mean_scores = list(map(lambda x: x[1] - x[0], zip(temperature_1_mean_scores, temperature_0_mean_scores)))
interval = stat_intervals(delta_mean_scores, 0.05)
print(interval)
print(round(interval[0], 4))

[1.42299107 7.93861607]
1.423


### 7

In [8]:
def permutation_t_stat_ind(sample1, sample2):
    return np.mean(sample1) - np.mean(sample2)

def get_random_combinations(n1, n2, max_combinations):
    index = list(range(n1 + n2))
    indices = set([tuple(index)])
    for i in range(max_combinations - 1):
        np.random.shuffle(index)
        indices.add(tuple(index))
    return [(index[:n1], index[n1:]) for index in indices]

def permutation_zero_dist_ind(sample1, sample2, max_combinations = None):
    joined_sample = np.hstack((sample1, sample2))
    n1 = len(sample1)
    n = len(joined_sample)
    
    if max_combinations:
        indices = get_random_combinations(n1, len(sample2), max_combinations)
    else:
        indices = [(list(index), filter(lambda i: i not in index, range(n))) \
                    for index in itertools.combinations(range(n), n1)]
    
    distr = [joined_sample[list(i[0])].mean() - joined_sample[list(i[1])].mean() \
             for i in indices]
    return distr

In [9]:
def permutation_test(sample1, sample2, max_permutations = None, alternative = 'two-sided'):
    if alternative not in ('two-sided', 'less', 'greater'):
        raise ValueError("alternative not recognized\n"
                         "should be 'two-sided', 'less' or 'greater'")
    
    t_stat = permutation_t_stat_ind(sample1, sample2)
    
    zero_distr = permutation_zero_dist_ind(sample1, sample2, max_permutations)
    
    if alternative == 'two-sided':
        return sum([1 if abs(x) >= abs(t_stat) else 0 for x in zero_distr]) / len(zero_distr)
    
    if alternative == 'less':
        return sum([1 if x <= t_stat else 0 for x in zero_distr]) / len(zero_distr)

    if alternative == 'greater':
        return sum([1 if x >= t_stat else 0 for x in zero_distr]) / len(zero_distr)

In [11]:
np.random.seed(0)
permutation_test(incident_0['Temperature'].values, incident_1['Temperature'].values, 10000)

0.007