In [1]:
import numpy as np
import pandas as pd

In [2]:
factor_0_all = [0,1,2,3]
factor_1_all = [0,1,2,3]

df = pd.DataFrame([
    [0,0,0.5],
    [0,1,0.75],
    [1,2,1.]
], columns=['factor_0', 'factor_1', 'probability']).set_index(['factor_0', 'factor_1'])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,probability
factor_0,factor_1,Unnamed: 2_level_1
0,0,0.5
0,1,0.75
1,2,1.0


In [3]:
# add missing indices
df_all_idx = df.join(pd.DataFrame(index=df.index.from_product([factor_0_all, factor_1_all])), on=['factor_0', 'factor_1'], how='outer', sort=['factor_0', 'factor_1'])
df_all_idx[df_all_idx.isna()] = 0.
df_all_idx

Unnamed: 0_level_0,Unnamed: 1_level_0,probability
factor_0,factor_1,Unnamed: 2_level_1
0,0,0.5
0,1,0.75
0,2,0.0
0,3,0.0
1,0,0.0
1,1,0.0
1,2,1.0
1,3,0.0
2,0,0.0
2,1,0.0


In [4]:
df_all_idx.index.dtype

dtype('O')

In [5]:
df_all_idx.loc[[[0,1], [0,0], [0,2], [1,2]]]

Unnamed: 0_level_0,Unnamed: 1_level_0,probability
factor_0,factor_1,Unnamed: 2_level_1
0,1,0.75
0,0,0.5
0,2,0.0
1,2,1.0


In [6]:
gus_factor_0 = np.array([0,0,0,1])
gus_factor_1 = np.array([1,0,2,2])

In [7]:
gus_factors = np.column_stack((gus_factor_0, gus_factor_1))
gus_factors

array([[0, 1],
       [0, 0],
       [0, 2],
       [1, 2]])

In [8]:
df_all_idx.loc[gus_factors.tolist()]

Unnamed: 0_level_0,Unnamed: 1_level_0,probability
factor_0,factor_1,Unnamed: 2_level_1
0,1,0.75
0,0,0.5
0,2,0.0
1,2,1.0


In [9]:
rng = np.random.default_rng()

In [10]:
p = df_all_idx.loc[gus_factors.tolist()]
p.values.flatten()

array([0.75, 0.5 , 0.  , 1.  ])

In [11]:
rng.binomial(1., p, p.shape)

array([[1],
       [1],
       [0],
       [1]])

In [12]:
df_ps = pd.DataFrame([
    [0,0,0.5, 0.6],
    [0,1,0.75, 0.2],
    [1,2,0.9, 0.]
], columns=['factor_0', 'factor_1', 'probability_0', 'probability_1']).set_index(['factor_0', 'factor_1'])
df_ps

Unnamed: 0_level_0,Unnamed: 1_level_0,probability_0,probability_1
factor_0,factor_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,0.5,0.6
0,1,0.75,0.2
1,2,0.9,0.0


In [13]:
df_ps[df_ps.sum(axis=1) > 1.]

Unnamed: 0_level_0,Unnamed: 1_level_0,probability_0,probability_1
factor_0,factor_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,0.5,0.6


In [14]:
df_ps.divide(df_ps.sum(axis=1), axis='rows') 
df_ps.where(df_ps.sum(axis=1) <= 1., df_ps.divide(df_ps.sum(axis=1), axis='rows') )

Unnamed: 0_level_0,Unnamed: 1_level_0,probability_0,probability_1
factor_0,factor_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,0.454545,0.545455
0,1,0.75,0.2
1,2,0.9,0.0


In [15]:
df_ps[df_ps.sum(axis=1) > 1.]

Unnamed: 0_level_0,Unnamed: 1_level_0,probability_0,probability_1
factor_0,factor_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,0.5,0.6


In [16]:
df_p = pd.DataFrame([
    [0,0,1.1],
    [0,1,0.75],
    [1,2,1.]
], columns=['factor_0', 'factor_1', 'probability']).set_index(['factor_0', 'factor_1'])
df_p

Unnamed: 0_level_0,Unnamed: 1_level_0,probability
factor_0,factor_1,Unnamed: 2_level_1
0,0,1.1
0,1,0.75
1,2,1.0


In [17]:
appeard = np.array([1.,0.,1.,0.])
appeard

array([1., 0., 1., 0.])

In [18]:
np.nonzero(appeard)

(array([0, 2]),)

In [19]:
np.argwhere(appeard)

array([[0],
       [2]])

In [20]:
appeard[np.nonzero(appeard)] = 2.
appeard

array([2., 0., 2., 0.])

In [21]:
np.nonzero(appeard)

(array([0, 2]),)

In [22]:
appeard = np.array([0.,0.,1.,0.])

In [23]:
gus_factors[0,0] = 10
gus_factors

array([[10,  1],
       [ 0,  0],
       [ 0,  2],
       [ 1,  2]])

In [24]:
gus_factor_0

array([0, 0, 0, 1])

In [25]:
gus_factor_1

array([1, 0, 2, 2])

In [26]:
appeard_idxs = np.nonzero(appeard)
appeard_idxs

(array([2]),)

In [27]:
gus_factors[np.nonzero(appeard)]

array([[0, 2]])

In [28]:
gus_factor_0[np.nonzero(appeard)]

array([0])

In [29]:
gus_factor_1[np.nonzero(appeard)]

array([2])

In [30]:
np.column_stack((gus_factor_0[np.nonzero(appeard)], gus_factor_1[np.nonzero(appeard)]))

array([[0, 2]])

In [31]:
np.column_stack(tuple([f for f in [gus_factor_0[np.nonzero(appeard)], gus_factor_1[np.nonzero(appeard)]]]))

array([[0, 2]])

In [32]:
rng.multinomial([1,1], [[0.5, 0.5],[0.5, 0.5]])

ValueError: object too deep for desired array

In [None]:
gus_factor_0[np.nonzero(gus_factor_0)] = [1]

In [None]:
np.nonzero(gus_factor_0)

In [None]:
df.columns.to_numpy()

In [35]:
gus_factors[:,1]

array([1, 0, 2, 2])

In [37]:
np.datetime64.

numpy.datetime64('2000-01-01')