In [2]:
from scipy.stats import norm

from tools.simulation import Simulation
from tools.simulation_params import SimulationParams
from tools.distribution import DistributionCouple


root = "result_data"
alternative = "two-sided"
alpha_list = [
    0.001,
    0.005,
    0.01,
    0.05
]

# Normally distributed samples

## Simulation under hypothesis

In [3]:
null_dist_list = [
    DistributionCouple(x_dist_family=norm,
                       y_dist_family=norm,
                       x_dist_param_dict={"loc": 0, "scale": 1},
                       y_dist_param_dict={"loc": 0, "scale": 1}),
    DistributionCouple(x_dist_family=norm,
                       y_dist_family=norm,
                       x_dist_param_dict={"loc": 0, "scale": 10},
                       y_dist_param_dict={"loc": 0, "scale": 10}),
    DistributionCouple(x_dist_family=norm,
                       y_dist_family=norm,
                       x_dist_param_dict={"loc": 0, "scale": 10},
                       y_dist_param_dict={"loc": 0, "scale": 100})
]

simulation_param_list = [
    SimulationParams(x_sample_size=10,
                     y_sample_size=10,
                     sample_name="Small balanced samples",
                     iter_size=5000,
                     dist_list=null_dist_list),
    SimulationParams(x_sample_size=10,
                     y_sample_size=100,
                     sample_name="Small unbalanced samples",
                     iter_size=5000,
                     dist_list=null_dist_list),
    SimulationParams(x_sample_size=100,
                     y_sample_size=100,
                     sample_name="Medium balanced samples",
                     iter_size=5000,
                     dist_list=null_dist_list),
    SimulationParams(x_sample_size=3000,
                     y_sample_size=100,
                     sample_name="Medium unbalanced samples",
                     iter_size=1000,
                     dist_list=null_dist_list),
    SimulationParams(x_sample_size=5000,
                     y_sample_size=5000,
                     sample_name="Large balanced samples",
                     iter_size=1000,
                     dist_list=null_dist_list),
    # SimulationParams(x_sample_size=5000,
    #                  y_sample_size=100000,
    #                  sample_name="Large unbalanced samples",
    #                  iter_size=10,
    #                  dist_list=null_dist_list)
]

simulation = Simulation(simulation_param_list=simulation_param_list,
                        root=root,
                        title="Normally distributed samples",
                        subtitle="Simulation under hypothesis",
                        alpha_list=alpha_list,
                        alternative=alternative,
                        true_hypothesis=0)

In [None]:
simulation.start(random_state=42,
                 rewrite_result=False,
                 result_disable=False,
                 tqdm_disable=False)

-------
Small balanced samples; N(0, 1) vs N(0, 1), FPR
                                        0.001   0.005   0.010   0.050
name                                                                 
bootstrap, basic method                0.0082  0.0170  0.0252  0.0758
bootstrap, bca method                  0.0096  0.0186  0.0274  0.0788
bootstrap, percentile method           0.0102  0.0196  0.0302  0.0792
permutation test, mean stat            0.0004  0.0040  0.0108  0.0466
permutation test, t-test stat          0.0006  0.0052  0.0106  0.0482
t-permutation test, equal var = false  0.0004  0.0044  0.0100  0.0470
t-test, equal var = false              0.0002  0.0042  0.0094  0.0442
t-test, equal var = true               0.0006  0.0050  0.0100  0.0456
z-test                                 0.0034  0.0114  0.0172  0.0622
-------
-------
Small balanced samples; N(0, 100) vs N(0, 100), FPR
                                        0.001   0.005   0.010   0.050
name                                

 57%|█████▋    | 574/1000 [4:30:45<1:55:38, 16.29s/it]   

## Simulation under alternative

In [None]:
simulation_param_list = [
    SimulationParams(x_sample_size=10,
                     y_sample_size=10,
                     sample_name="Small balanced samples",
                     iter_size=5000,
                     dist_list=[
                         DistributionCouple(x_dist_family=norm,
                                            y_dist_family=norm,
                                            x_dist_param_dict={"loc": 0, "scale": 1},
                                            y_dist_param_dict={"loc": 1.7, "scale": 1}),
                         DistributionCouple(x_dist_family=norm,
                                            y_dist_family=norm,
                                            x_dist_param_dict={"loc": 0, "scale": 10},
                                            y_dist_param_dict={"loc": 17, "scale": 10}),
                         DistributionCouple(x_dist_family=norm,
                                            y_dist_family=norm,
                                            x_dist_param_dict={"loc": 0, "scale": 10},
                                            y_dist_param_dict={"loc": 100, "scale": 100}),
                         DistributionCouple(x_dist_family=norm,
                                            y_dist_family=norm,
                                            x_dist_param_dict={"loc": 130, "scale": 10},
                                            y_dist_param_dict={"loc": 0, "scale": 100}),
                     ]),
    SimulationParams(x_sample_size=10,
                     y_sample_size=100,
                     sample_name="Small unbalanced samples",
                     iter_size=5000,
                     dist_list=[
                         DistributionCouple(x_dist_family=norm,
                                            y_dist_family=norm,
                                            x_dist_param_dict={"loc": 0, "scale": 1},
                                            y_dist_param_dict={"loc": 1, "scale": 1}),
                         DistributionCouple(x_dist_family=norm,
                                            y_dist_family=norm,
                                            x_dist_param_dict={"loc": 0, "scale": 10},
                                            y_dist_param_dict={"loc": 10, "scale": 10}),
                         DistributionCouple(x_dist_family=norm,
                                            y_dist_family=norm,
                                            x_dist_param_dict={"loc": 0, "scale": 10},
                                            y_dist_param_dict={"loc": 40, "scale": 100}),
                         DistributionCouple(x_dist_family=norm,
                                            y_dist_family=norm,
                                            x_dist_param_dict={"loc": 40, "scale": 10},
                                            y_dist_param_dict={"loc": 0, "scale": 100}),
                     ]),
    SimulationParams(x_sample_size=100,
                     y_sample_size=100,
                     sample_name="Medium balanced samples",
                     iter_size=5000,
                     dist_list=[
                         DistributionCouple(x_dist_family=norm,
                                            y_dist_family=norm,
                                            x_dist_param_dict={"loc": 0, "scale": 1},
                                            y_dist_param_dict={"loc": 0.5, "scale": 1}),
                         DistributionCouple(x_dist_family=norm,
                                            y_dist_family=norm,
                                            x_dist_param_dict={"loc": 0, "scale": 10},
                                            y_dist_param_dict={"loc": 5, "scale": 10}),
                         DistributionCouple(x_dist_family=norm,
                                            y_dist_family=norm,
                                            x_dist_param_dict={"loc": 0, "scale": 10},
                                            y_dist_param_dict={"loc": 35, "scale": 100}),
                         DistributionCouple(x_dist_family=norm,
                                            y_dist_family=norm,
                                            x_dist_param_dict={"loc": 35, "scale": 10},
                                            y_dist_param_dict={"loc": 0, "scale": 100}),
                     ]),
    SimulationParams(x_sample_size=3000,
                     y_sample_size=100,
                     sample_name="Medium unbalanced samples",
                     iter_size=5000,
                     dist_list=[
                         DistributionCouple(x_dist_family=norm,
                                            y_dist_family=norm,
                                            x_dist_param_dict={"loc": 0, "scale": 1},
                                            y_dist_param_dict={"loc": 0.3, "scale": 1}),
                         DistributionCouple(x_dist_family=norm,
                                            y_dist_family=norm,
                                            x_dist_param_dict={"loc": 0, "scale": 10},
                                            y_dist_param_dict={"loc": 3, "scale": 10}),
                         DistributionCouple(x_dist_family=norm,
                                            y_dist_family=norm,
                                            x_dist_param_dict={"loc": 0, "scale": 10},
                                            y_dist_param_dict={"loc": 30, "scale": 100}),
                         DistributionCouple(x_dist_family=norm,
                                            y_dist_family=norm,
                                            x_dist_param_dict={"loc": 32, "scale": 10},
                                            y_dist_param_dict={"loc": 0, "scale": 100}),
                     ]),
    SimulationParams(x_sample_size=5000,
                     y_sample_size=5000,
                     sample_name="Large balanced samples",
                     iter_size=1000,
                     dist_list=[
                         DistributionCouple(x_dist_family=norm,
                                            y_dist_family=norm,
                                            x_dist_param_dict={"loc": 0, "scale": 1},
                                            y_dist_param_dict={"loc": 0.08, "scale": 1}),
                         DistributionCouple(x_dist_family=norm,
                                            y_dist_family=norm,
                                            x_dist_param_dict={"loc": 0, "scale": 10},
                                            y_dist_param_dict={"loc": 0.8, "scale": 10}),
                         DistributionCouple(x_dist_family=norm,
                                            y_dist_family=norm,
                                            x_dist_param_dict={"loc": 0, "scale": 10},
                                            y_dist_param_dict={"loc": 6, "scale": 100}),
                         DistributionCouple(x_dist_family=norm,
                                            y_dist_family=norm,
                                            x_dist_param_dict={"loc": 5, "scale": 10},
                                            y_dist_param_dict={"loc": 0, "scale": 100}),
                     ]),
    # SimulationParams(x_sample_size=5000,
    #                  y_sample_size=100000,
    #                  sample_name="Large unbalanced samples",
    #                  iter_size=10,
    #                  dist_list=null_dist_list)
]

simulation = Simulation(simulation_param_list=simulation_param_list,
                        root=root,
                        title="Normally distributed samples",
                        subtitle="Simulation under alternative",
                        alpha_list=alpha_list,
                        alternative=alternative,
                        true_hypothesis=1)

In [None]:
simulation.start(random_state=142,
                 rewrite_result=False,
                 result_disable=False,
                 tqdm_disable=False)