In [1]:
import pandas as pd
import numpy as np
import random

In [2]:
# df = pd.read_csv("ppmf_01.csv") # to read the actual census df

m = 10  # num cols
n = 100 # num rows
df = pd.DataFrame(np.random.randint(0,2,size=(n, m)))
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0,0,0,1,1,1,0,1,0,0
1,1,0,0,0,1,0,0,0,0,1
2,0,0,1,0,1,0,1,0,0,0
3,0,1,1,1,0,1,1,0,0,1
4,1,1,1,0,0,0,1,1,0,1
...,...,...,...,...,...,...,...,...,...,...
95,0,0,0,1,1,0,1,0,1,1
96,0,0,0,0,0,0,1,0,0,0
97,1,1,1,1,1,0,1,1,0,1
98,1,1,1,0,0,0,1,0,0,0


In [3]:
def reconstruct(df):
    # tabulate 1 way queries
    qs = df.sum()
    recon = pd.DataFrame(columns=df.columns)
    (n, m) = df.shape
    
    for _ in range(len(df)):
        row = construct_row(recon, qs, n, m)
        recon.loc[len(recon)] = row
        qs = qs - row
        
    return recon
        
    
def construct_row(recon, qs, n, m):
    prob = qs/(n - len(recon))
    row = np.array([])

    for i in range(m):
        if random.random() <= prob[i]:
            row = np.append(row, 1)
        else:
            row = np.append(row, 0)

    row = row.astype(int)
    return row
    

In [4]:
recon = reconstruct(df)
recon

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,1,0,0,0,1,1,0,1,1,0
1,0,1,1,0,0,1,0,0,0,1
2,1,0,1,0,0,0,0,1,1,1
3,0,0,1,1,1,1,1,1,1,0
4,1,0,1,1,0,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...
95,1,1,1,0,1,0,0,0,1,1
96,0,0,0,0,1,1,1,0,0,1
97,1,0,1,0,1,1,0,1,1,1
98,1,1,0,1,0,1,1,0,0,0


In [5]:
recon.sum()

0    48
1    53
2    52
3    45
4    48
5    50
6    52
7    50
8    46
9    49
dtype: int64

In [6]:
df.sum()

0    48
1    53
2    52
3    45
4    48
5    50
6    52
7    50
8    46
9    49
dtype: int64

### Two Way Queries

In [7]:
class OneWayQuery:
    def __init__(self, col, q, n):
        self.col = col
        self.q = q
        self.n = n

class TwoWayQuery:
    def __init__(self, col_1, col_2, q, n):
        self.col_1 = col_1
        self.col_2 = col_2
        self.q = q
        self.n = n

In [8]:
m = 2  # num cols
n = 7 # num rows
df = pd.DataFrame(np.random.randint(0,2,size=(n, m)))
df

Unnamed: 0,0,1
0,1,1
1,0,1
2,1,0
3,1,0
4,0,0
5,0,0
6,1,1


In [9]:
recon = pd.DataFrame(np.empty((n,m)))
recon

Unnamed: 0,0,1
0,0.0,0.0
1,3.5e-323,0.0
2,0.0,0.0
3,0.0,0.0
4,1.018558e-312,9.761181e-313
5,1.103438e-312,1.909796e-312
6,1.363129e+161,9.734698e-309


In [12]:
col_1 = 0
col_2 = 1
ans = len(df.query(f'@df[{col_1}] == 1 and @df[{col_2}] == 1'))
two = TwoWayQuery(col_1, col_2, ans, n)

ones = []
for index, ans in df.sum().items():
    q = OneWayQuery(index, ans, n)
    ones.append(q)


for i in range(n):
    for j in range(m):
        if j == 0:
            if random.random() <= (ones[j].q / ones[j].n):
                recon.loc[i, j] = 1
                ones[j].q -= 1
                ones[j].n -= 1
            else:
                recon.loc[i, j] = 0
                ones[j].n -= 1
        else: # j = 1
            if random.random() <= (ones[1].q - two.q)/(max(ones[0].n - ones[0].q, 1)):
                recon.loc[i, j] = 1
                ones[j].q -= 1
                ones[j].n -= 1
                
                if df.loc[i, 0] == 1: # when 2 way query row is made
                    two.q -= 1
                    two.n -= 1
            else:
                recon.loc[i, j] = 0
                ones[j].n -= 1

recon[0] = recon[0].astype(int)
recon[1] = recon[1].astype(int)

In [13]:
recon

Unnamed: 0,0,1
0,1,0
1,0,0
2,1,1
3,1,1
4,1,0
5,0,1
6,0,0


In [14]:
df

Unnamed: 0,0,1
0,1,1
1,0,1
2,1,0
3,1,0
4,0,0
5,0,0
6,1,1
