In [2]:
from IPython.display import display
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import scipy.stats as stats
from datetime import datetime

plt.rcParams['font.family'] = ['sans-serif']
plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签`
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['figure.figsize'] = (8, 5)  # 提前设置图片形状大小

# 在notebook中可以更好的显示，svg输出是一种向量化格式，缩放网页并不会导致图片失真。这行代码似乎只用在ipynb文件中才能使用。
%config InlineBackend.figure_format = 'svg'

%matplotlib inline

warnings.filterwarnings('ignore')  # 忽略一些warnings
# This allows multiple outputs from a single jupyter notebook cell:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pd.set_option('expand_frame_repr', False)
pd.set_option('display.unicode.ambiguous_as_wide', True)
pd.set_option('display.unicode.east_asian_width', True)
pd.set_option('display.width', 180)


In [None]:
    lam_ref = np.array([[0.6],[0.2]], dtype=float)
    mu_ref = np.array([[2],[1]], dtype=float)

In [3]:
class Distribution:
    def __init__(self, offset:float):
        self.offset = offset

    def sample(self) -> float:
        raise NotImplementedError

    def __call__(self):
        return self.sample()

    def input(self, x:float) -> float:
        return x - self.offset

    def output(self, x:float) -> float:
        return x + self.offset

    def logW(self, x:float) -> float:
        raise NotImplementedError

    def logW_tail(self, x:float) -> float:
        raise NotImplementedError

class ExponDistri(Distribution):
    def __init__(self, rates:list[float], seed:int=None, eps:float=1e-7):
        """An exponential random variable generator and compute log-likelihood ratio. 
        However, due to precision limitations of simpy, we cannot sample too small value.

        Args:
            rates (list[float]): rates
            eps (float, optional): offset. Defaults to 1e-7.
            seed (int, optional): random seed. Defaults to None.
        """        
        super().__init__(eps)
        self.rates = np.array(rates)
        self.num_phase = len(self.rates)
        self.seed = seed
        self.random = np.random.RandomState(seed=seed) 
        
    def sample(self, phase:int=0) -> float:
        """Sample an exponential random variable by using the rate `rates[phase]`. 

        Args:
            phase (int or narray, optional): phase to sample. Defaults to 0.

        Returns:
            float: an exponential random variable 
        """        
        sample = self.random.exponential(scale=1/self.rates[phase])
        return self.output(sample)
    
    def _logpdf(self, x:float, phase:int=0) -> float:
        lam = self.rates[phase]
        return np.log(lam) - lam * x

    def _logtail(self, x:float, phase:int=0) -> float:
        return -self.rates[phase] * x

    def logW(self, x:float, phase:int) -> float:
        """log-likelihood ratio of x sampling from phase, w.r.t. phase 0

        Args:
            x (float): input
            phase (int | list[int], optional): phase that x samples from. Defaults to 0.

        Returns:
            float|list[float]: log-likelihood ratio
        """        
        if phase == 0:
            return 0
        else:
            x = self.input(x)
            return self._logpdf(x, 0) - self._logpdf(x, phase)

    def logW_tail(self, x:float, phase:int) -> float:
        """log-tail ratio of x sampling from phase, w.r.t. phase 0

        Args:
            x (float): input
            phase (int | list[int], optional): phase that x samples from. Defaults to 0.

        Returns:
            float|list[float]: log-tail ratio
        """    
        if phase == 0:
            return 0
        else:
            x = self.input(x)
            return self._logtail(x, 0) - self._logtail(x, phase)

rates = [0.01, 0.1, 1, 10, 100, 1000]
exp = ExponDistri(rates)
print(exp.sample([2,3,4]))
print(exp.logW(3, [2,3,4,5]))

[0.27240316 0.01872605 0.01317181]
[-1.63517028e+00  2.30622437e+01  2.90759650e+02  2.98845697e+03]


In [7]:
min(10, np.quantile([1,2,3,4,5], 0.95))

4.8

In [5]:
lam = np.array([[0.6, 0.8333],
                [0.2, 0.4666]])
mu = np.array([[   2,    1.2],
               [   1,    0.2]])

[ExponDistri(lam[i], np.random.randint(1000000)) for i in range(2)]

[<__main__.ExponDistri at 0x1c16bf3dc10>,
 <__main__.ExponDistri at 0x1c16bf335b0>]

In [1]:
from multiprocessing import Pool, freeze_support, cpu_count
max(cpu_count() - 1, 1)

15

In [33]:
data = pd.read_csv("./result/2_unbias_check/temp_t1_iter50_policy1.csv", header=None,skiprows=[0])
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,295,296,297,298,299,300,301,302,303,304
0,0,0.6,0.757056,2,1.457555,100000,29.108651,1819461,1.662982,"[1.635,1.691]",...,0.2281,0.491864,0.67979,0.677317,0.00078,0.533982,0.222383,0.002316,0.33677,0.290571
1,1,0.6,0.757056,2,1.457555,100000,34.913426,1812566,1.644043,"[1.619,1.669]",...,0.22841,0.500228,0.733543,0.680906,0.00091,0.511452,0.197839,0.002713,0.33545,0.296525
2,2,0.6,0.757056,2,1.457555,100000,35.208951,1808790,1.679207,"[1.652,1.706]",...,0.22823,0.499747,0.687108,0.682751,0.00096,0.589867,0.321726,0.002872,0.33428,0.312477
3,3,0.6,0.757056,2,1.457555,100000,35.156116,1804203,1.671269,"[1.645,1.698]",...,0.22808,0.490578,0.598408,0.685625,0.00085,0.59538,0.238272,0.002555,0.33266,0.282476
4,4,0.6,0.757056,2,1.457555,100000,35.544403,1800811,1.683032,"[1.654,1.712]",...,0.228,0.496932,0.721487,0.682043,0.00091,0.508545,0.289435,0.002722,0.33429,0.302465
5,5,0.6,0.757056,2,1.457555,100000,35.221129,1799597,1.672942,"[1.644,1.702]",...,0.22728,0.498428,0.718113,0.68154,0.00088,0.594419,0.205939,0.002639,0.33348,0.303946
6,6,0.6,0.757056,2,1.457555,100000,35.370415,1797066,1.66878,"[1.644,1.694]",...,0.22657,0.503509,0.699856,0.678963,0.00091,0.555648,0.267999,0.002727,0.3337,0.301353
7,7,0.6,0.757056,2,1.457555,100000,38.298417,1808522,1.663185,"[1.637,1.689]",...,0.22893,0.500122,0.72279,0.68209,0.00105,0.510556,0.221924,0.003128,0.33563,0.298527
8,8,0.6,0.757056,2,1.457555,100000,40.79152,1819008,1.658004,"[1.632,1.684]",...,0.23024,0.494642,0.61001,0.687202,0.00084,0.444793,0.100497,0.002507,0.33504,0.272222
9,9,0.6,0.757056,2,1.457555,100000,38.209039,1803517,1.657229,"[1.631,1.683]",...,0.23052,0.496102,0.622067,0.682597,0.00104,0.47172,0.110473,0.00308,0.33771,0.29907


In [34]:
mean = data[22].mean()
lower, upper = data[23].apply(lambda x : eval(x)[0]), data[23].apply(lambda x : eval(x)[1])
lower_CI, upper_CI = lower.mean(), upper.mean()
mean
lower_CI
upper_CI

0.13815161010000002

0.13586600000000001

0.14044

In [35]:
mean = data[50].mean()
lower, upper = data[51].apply(lambda x : eval(x)[0]), data[51].apply(lambda x : eval(x)[1])
lower_CI, upper_CI = lower.mean(), upper.mean()
mean
lower_CI
upper_CI

0.04882982104000002

0.04771500000000001

0.04994460000000001

In [36]:
mean = data[98].mean()
lower, upper = data[99].apply(lambda x : eval(x)[0]), data[99].apply(lambda x : eval(x)[1])
lower_CI, upper_CI = lower.mean(), upper.mean()
mean
lower_CI
upper_CI

0.12821282098

0.12613000000000002

0.13030199999999997

In [37]:
mean = data[126].mean()
lower, upper = data[127].apply(lambda x : eval(x)[0]), data[127].apply(lambda x : eval(x)[1])
lower_CI, upper_CI = lower.mean(), upper.mean()
mean
lower_CI
upper_CI

0.08244167776

0.0809764

0.08390680000000002

In [38]:
mean = data[174].mean()
lower, upper = data[175].apply(lambda x : eval(x)[0]), data[175].apply(lambda x : eval(x)[1])
lower_CI, upper_CI = lower.mean(), upper.mean()
mean
lower_CI
upper_CI

0.18036646152000002

0.177152

0.183584

In [39]:
mean = data[202].mean()
lower, upper = data[203].apply(lambda x : eval(x)[0]), data[203].apply(lambda x : eval(x)[1])
lower_CI, upper_CI = lower.mean(), upper.mean()
mean
lower_CI
upper_CI

0.13630755494

0.133726

0.13889400000000002

In [40]:
mean = data[250].mean()
lower, upper = data[251].apply(lambda x : eval(x)[0]), data[251].apply(lambda x : eval(x)[1])
lower_CI, upper_CI = lower.mean(), upper.mean()
mean
lower_CI
upper_CI

0.21625822012

0.21242999999999998

0.22008799999999998

In [41]:
mean = data[278].mean()
lower, upper = data[279].apply(lambda x : eval(x)[0]), data[279].apply(lambda x : eval(x)[1])
lower_CI, upper_CI = lower.mean(), upper.mean()
mean
lower_CI
upper_CI

0.17811018112000002

0.17472800000000002

0.18149799999999996

In [24]:
contain_pct = ((data[22] < upper_CI) & (data[22] > lower_CI)).sum() / data.shape[0]
contain_pct

0.9615384615384616

In [28]:
overlap_pct = ((lower < upper_CI) & (upper > lower_CI)).sum()/data.shape[0]
overlap_pct

1.0

In [14]:
data[23].apply(lambda x : eval(x)[0])

0     0.1853
1     0.1809
2     0.1855
3     0.1796
4     0.1867
5     0.1908
6     0.1832
7     0.1840
8     0.1791
9     0.1817
10    0.1793
11    0.1848
12    0.1711
13    0.1835
14    0.1865
15    0.1928
16    0.1791
17    0.1759
18    0.1878
19    0.1768
20    0.1783
21    0.1853
22    0.1814
23    0.1848
24    0.1823
25    0.1780
Name: 23, dtype: float64

In [5]:
phase = 0

lam_ref = np.array([[0.6],[0.2], [0.2], [0.2]], dtype=float)
mu_ref = np.array([[2],[1], [2], [2]], dtype=float)
lam = lam_ref[:, [0, phase]].copy()
mu = mu_ref[:, [0, phase]].copy()
lam
mu

array([[0.6, 0.6],
       [0.2, 0.2],
       [0.2, 0.2],
       [0.2, 0.2]])

array([[2., 2.],
       [1., 1.],
       [2., 2.],
       [2., 2.]])

In [4]:
phase = 0
lam_ref = np.array([[0.6],[0.2]], dtype=float)
mu_ref = np.array([[2],[1]], dtype=float)
lam = lam_ref[:, [0, phase]].copy()
mu = mu_ref[:, [0, phase]].copy()
lam
mu
lam_ref

array([[0.6, 0.6],
       [0.2, 0.2]])

array([[2., 2.],
       [1., 1.]])

array([[0.6],
       [0.2]])

In [47]:
a = np.array([0.6, 0.2, 0.2])
1-a[:0].sum()

1.0

In [50]:
type = 0
print(f'printfffff {type}')


printfffff 0


In [37]:
a = [0.6, 0.2, 0.2]
b = [0.8333, 0.4666, 0.4666]
c = np.array(list(zip(a, b)))
mu_each = c[:,0]
mu_each

array([0.6, 0.2, 0.2])

In [4]:
num_type = 2
threshold = [[5], [5, 10]]

for flow in range(num_type):
    for idx, gamma in enumerate(threshold[flow]):
        print(idx, gamma)


0 5
0 5
1 10


In [None]:
[np.zeros(len(threshold[i])) for i in range(num_type)]

In [29]:
flow_id = 0
cycle_sum_probs = ([np.array([0.]), np.array([0., 0.])],[np.array([2.06984417e-05]), np.array([9.75310188e-06, 9.75310188e-06])],[np.array([0.01200533]), np.array([0.00197793, 0.00180436])])
for i, gamma in enumerate(threshold[flow_id]):
    print(np.array([cycle_sum_prob[flow_id][i] for cycle_sum_prob in cycle_sum_probs]))

[0.00000000e+00 2.06984417e-05 1.20053300e-02]


In [30]:
flow_id = 1
cycle_sum_probs = ([np.array([0.]), np.array([0., 0.])],[np.array([2.06984417e-05]), np.array([9.75310188e-06, 9.75310188e-06])],[np.array([0.01200533]), np.array([0.00197793, 0.00180436])])
for i, gamma in enumerate(threshold[flow_id]):
    print(np.array([cycle_sum_prob[flow_id][i] for cycle_sum_prob in cycle_sum_probs]))

[0.00000000e+00 9.75310188e-06 1.97793000e-03]
[0.00000000e+00 9.75310188e-06 1.80436000e-03]


In [32]:
reasons = ([np.array([1, 1]), np.array([0, 0])], [np.array([ 1, -1]), np.array([-1, -1])], [np.array([1, 1]), np.array([0, 0])], [np.array([ 1, -1]), np.array([ 0, -1])])
freq = np.array([reason[0][i] for reason in reasons])
freq
np.array([reason[1][i] for reason in reasons])

array([ 1, -1,  1, -1])

array([ 0, -1,  0, -1])

In [8]:
lam = np.array([[0.6, 0.8333],
                [0.2, 0.4666]])
lam[:, 0]
mu = np.array([[   2,    1.2],
               [   1,    0.2]])
lam

array([[0.6   , 0.8333],
       [0.2   , 0.4666]])

In [56]:
num_type = 3
dict(zip(range(num_type), -np.array(range(num_type))))

{0: 0, 1: -1, 2: -2}

In [58]:
gamma = [[2, 4], [3, 6], [5, 7]]
[max(gamma[i]) for i in range(num_type)] 
[np.full(len(gamma[i]), -1) for i in range(num_type)]

[4, 6, 7]

[array([-1, -1]), array([-1, -1]), array([-1, -1])]

In [12]:
L = [[] for i in range(num_type)]
L[0].append(None)
for i in range(num_type):
    L[i].extend(list(lam[i])+list(mu[i]))
[sum(L, [])]

[[None, 0.6, 0.8333, 2.0, 1.2, 0.2, 0.4666, 1.0, 0.2]]

In [14]:
L

[[None, 0.6, 0.8333, 2.0, 1.2], [0.2, 0.4666, 1.0, 0.2]]

In [None]:
flow_id = 0
cycle_num = [cycle_num[flow_id] for cycle_num in cycle_nums]
cycle_sum = [cycle_sum[flow_id] for cycle_sum in cycle_sums]

for i, gamma in enumerate(threshold[flow_id]):
    cycle_sum_prob = np.array([cycle_sum_prob[flow_id][i] for cycle_sum_prob in cycle_sum_probs])
    mean, halfCI = mean_CI(cycle_sum_prob)
    L[flow_id].extend([gamma, mean, f'[{mean-halfCI:.4g},{mean+halfCI:.4g}]', halfCI, halfCI/mean/1.96])
    print(f'Prob {gamma} in flow 1 (Nominator): {mean:.14g}  and CI [{mean-halfCI:.14g},{mean+halfCI:.14g}] RE {halfCI/1.96/mean:.14g}')
    mean, halfCI = cycle_statistics(cycle_num, cycle_sum_prob)
    L[flow_id].extend([mean, f'[{mean-halfCI:.4g},{mean+halfCI:.4g}]', halfCI, halfCI/mean/1.96])
    print(f'Prob {gamma} in flow 1: {mean:.14g}  and CI [{mean-halfCI:.14g},{mean+halfCI:.14g}] RE {halfCI/1.96/mean:.14g}')
    freq = np.array([reason[0][i] for reason in reasons])
    print(f'Mean flow 1 {np.mean(cycle_sum_prob[freq==0])}')
    print(f'ratio flow 1 {sum(freq==0)/len(freq)}')
    print(f'Mean flow 2 {np.mean(cycle_sum_prob[freq==1])}')
    print(f'ratio flow 2 {sum(freq==1)/len(freq)}')
    L[flow_id].extend([sum(freq==0)/len(freq), sum(freq==1)/len(freq), sum(freq==0)/len(freq) + sum(freq==1)/len(freq), sum(freq==1)/(sum(freq==0)+sum(freq==1))])
    L[flow_id].extend([np.mean(cycle_sum_prob[freq==0]), np.mean(cycle_sum_prob[freq==1])])
    L[flow_id].extend([np.var(cycle_sum_prob[freq==0]), np.var(cycle_sum_prob[freq==1])])
    L[flow_id].append(np.var(cycle_sum_prob))

In [None]:
flow_id = 1

cycle_num = [cycle_num[flow_id] for cycle_num in cycle_nums]
cycle_sum = [cycle_sum[flow_id] for cycle_sum in cycle_sums]

for i, gamma in enumerate(threshold[1]):
        cycle_sum_prob = np.array([cycle_sum_prob[1][i] for cycle_sum_prob in cycle_sum_probs])
        mean, halfCI = mean_CI(cycle_sum_prob)
        L[flow_id].extend([mean, f'[{mean-halfCI:.4g},{mean+halfCI:.4g}]', halfCI, halfCI/mean/1.96, time.time()-start])
        print(f'Prob {gamma} in flow 2 (Nominator): {mean:.14g}  and CI [{mean-halfCI:.14g},{mean+halfCI:.14g}] RE {halfCI/1.96/mean:.14g}')
        print(f'std {np.std(cycle_sum_prob)}')
        mean, halfCI = cycle_statistics(cycle_num, cycle_sum_prob)
        L[flow_id].extend([mean, f'[{mean-halfCI:.4g},{mean+halfCI:.4g}]', halfCI, halfCI/mean/1.96, time.time()-start])
        print(f'Prob {gamma} in flow 2: {mean:.14g}  and CI [{mean-halfCI:.14g},{mean+halfCI:.14g}] RE {halfCI/1.96/mean:.14g}')
        freq = np.array([reason[1][i] for reason in reasons])
        assert len(freq) == len(cycle_sum_prob)
        print(f'Mean flow 1 {np.mean(cycle_sum_prob[freq==0])}')
        print(f'Std flow 1 {np.std(cycle_sum_prob[freq==0])}')
        print(f'ratio flow 1 {sum(freq==0)/len(freq)}')
        print(f'Mean flow 2 {np.mean(cycle_sum_prob[freq==1])}')
        print(f'Std flow 2 {np.std(cycle_sum_prob[freq==1])}')
        print(f'ratio flow 2 {sum(freq==1)/len(freq)}')
        L[flow_id].extend([sum(freq==0)/len(freq), sum(freq==1)/len(freq), sum(freq==0)/len(freq) + sum(freq==1)/len(freq), sum(freq==1)/(sum(freq==0)+sum(freq==1))])
        L[flow_id].extend([np.mean(cycle_sum_prob[freq==0]), np.mean(cycle_sum_prob[freq==1])])
        L[flow_id].extend([np.var(cycle_sum_prob[freq==0]), np.var(cycle_sum_prob[freq==1])])
        L[flow_id].append(np.var(cycle_sum_prob))
        print(f'Frquency reason {gamma} in flow 1 (Nominator): {mean:.14g}  and CI [{mean-halfCI:.14g},{mean+halfCI:.14g}] RE {halfCI/1.96/mean:.14g}')