In [1]:
import numpy as np
import pandas as pd
import datetime, os
#import randomization # this one doesn't make a dataframe :(
# from stochatreat import stochatreat # this one doesnt include the block_id :(
from rpy2.robjects.packages import importr
import rpy2.robjects as ro # this will be useful if not running in Jupyter
%load_ext rpy2.ipython

In [2]:
n_randomizations = 100_000
n_arms = 3
units_per_block = 6 #aka total number of randomizations in each block. assumes balanced blocks, so block size = units_per_block = n_arms * (x arm appearances)

In [4]:
%%R -i n_randomizations -i n_arms -i units_per_block
library(blockrand)
set.seed(667)

########### French Wikipedia Welcome Experiment
#observations = 10
## We will run this for 90 days. In 2019 The maximum number of newcomers per 30 days has never exceeded
## 20,000. So 60,000 should be sufficient.

frwiki <- blockrand(n=n_randomizations, 
                    num.levels = n_arms,
                    levels = c(0,1,2),
                    block.sizes = c(units_per_block/n_arms, units_per_block/n_arms, units_per_block/n_arms), 
                    id.prefix='newcomer_', 
                    #block.prefix='frwiki.block',
                    #stratum='post'
                    )

In [9]:
%R -o frwiki

In [10]:
frwiki.head(12)

Unnamed: 0,id,block.id,block.size,treatment
1,newcomer_001,1,6.0,2
2,newcomer_002,1,6.0,0
3,newcomer_003,1,6.0,2
4,newcomer_004,1,6.0,1
5,newcomer_005,1,6.0,1
6,newcomer_006,1,6.0,0
7,newcomer_007,2,6.0,1
8,newcomer_008,2,6.0,2
9,newcomer_009,2,6.0,1
10,newcomer_010,2,6.0,0


In [11]:
target_colnames = {"block.id":"randomization_block_id",
                       "block.size":"randomization_block_size",
                       "treatment":"randomization_arm"}
exportable = frwiki.rename(target_colnames,
                      axis='columns')

In [12]:
exportable = exportable[["randomization_arm", "randomization_block_id",]]

In [13]:
exportable = exportable.astype(int)

In [14]:
exportable.head()

Unnamed: 0,randomization_arm,randomization_block_id
1,2,1
2,0,1
3,2,1
4,1,1
5,1,1


In [15]:
fname_stem = 'fr-randomziation'
date = datetime.datetime.today().strftime('%Y%m%d')
fname = os.path.join('/data/project/frwelcome/randomizations', f'{fname_stem}-{date}-{n_randomizations}.split.noindex.json')
exportable.to_json(fname, orient='split', index=False)

In [16]:
!du -h /data/project/frwelcome/randomizations/*

2.3M	/data/project/frwelcome/randomizations/fr-dummy-randomziation-201909024.csv
13M	/data/project/frwelcome/randomizations/fr-dummy-randomziation-201909024.json
200K	/data/project/frwelcome/randomizations/fr-dummy-randomziation-20190924-10000.columns.json
116K	/data/project/frwelcome/randomizations/fr-dummy-randomziation-20190924-10000.csv
592K	/data/project/frwelcome/randomizations/fr-dummy-randomziation-20190924-10000.index.json
592K	/data/project/frwelcome/randomizations/fr-dummy-randomziation-20190924-10000.json
528K	/data/project/frwelcome/randomizations/fr-dummy-randomziation-20190924-10000.redcords.json
136K	/data/project/frwelcome/randomizations/fr-dummy-randomziation-20190924-10000.split.json
88K	/data/project/frwelcome/randomizations/fr-dummy-randomziation-20190924-10000.split.noindex.json
2.3M	/data/project/frwelcome/randomizations/fr-dummy-randomziation-20190924-100000.columns.json
1.3M	/data/project/frwelcome/randomizations/fr-dummy-randomziation-20190924-100000