In [None]:
import sys
import os

sys.path.insert(0, os.getcwd() + '/reddit_download')

In [None]:
import numpy as np
import matplotlib.pyplot as plt

sys.path.append('../..')
from plotting.matplotlib_setup import configure_latex, savefig, set_size_decorator, savefig, thiner_border

tex_dir, images_dir = 'porocilo/main.tex', 'porocilo/images'

configure_latex(style=['science', 'notebook'], global_save_path=images_dir)

%config InlineBackend.figure_format = 'pdf'

## preprocess and make csv

In [None]:
# from reddit_download.RWV.pushshift.utils import build_df

# df_comments = build_df(content_type='comment', file_path=os.getcwd() + '/reddit_download')
# df_posts = build_df(content_type='post', file_path=os.getcwd() + '/reddit_download')

# ind = df_comments[df_comments['author'] == '[deleted]'].index
# df_comments.drop(ind, inplace=True)

# ind = df_comments[df_comments['author'] == 'AutoModerator'].index
# df_comments.drop(ind, inplace=True)

# ind = df_posts[df_posts['author'] == '[deleted]'].index
# df_posts.drop(ind, inplace=True)

# ind = df_posts[df_posts['author'] == 'AutoModerator'].index
# df_posts.drop(ind, inplace=True)

# df_comments = df_comments.rename(columns={"link_id": "post_id"})

# df_comments = df_comments.rename(columns={"created_utc": "timestamp"})
# df_posts = df_posts.rename(columns={"created_utc": "timestamp"})

# df_comments.to_csv('comments.csv', index=False)
# df_posts.to_csv('posts.csv', index=False)

## modin and ray stuff

In [None]:
# import pandas as pd
# import swifter

os.environ["MODIN_ENGINE"] = "ray" 
os.environ["MODIN_CPUS"] = "8"
import ray
ray.init(num_cpus=8)
import modin.pandas as pd

#import swifter

# from distributed import Client
# client = Client()

# workers = 12

# os.environ["MODIN_ENGINE"] = "ray" 
# os.environ["MODIN_CPUS"] = str(workers)

# import ray
# ray.init(num_cpus=workers)

# import modin.pandas as pd

from tqdm import tqdm
from modin.config import ProgressBar
ProgressBar.enable()

In [None]:
df_comments = pd.read_csv('comments.csv', lineterminator='\n')
df_posts = pd.read_csv('posts.csv', lineterminator='\n')

In [None]:
df_comments.drop(columns=['author', 'timestamp', 'post_id', 'parent_id', 'permalink'], inplace=True)
df_posts.drop(columns=['author', 'timestamp', 'post_id', 'num_comments', 'permalink'], inplace=True)

In [None]:
df_comments['body'] = df_comments['body'].apply(lambda x: str(x))

## make sentences with NLTK tokenizer

In [None]:
from reddit_download.RWV.text_processing.process_reddit import word2vec_input

In [None]:
class TokenizerInput:
    def __init__(self, text):
        self.body = str(text)
        self.is_post = False

def body_to_sent(x):
    return word2vec_input([TokenizerInput(x)], to_sent=True)

In [None]:
# df_comments['sent'] = df_comments['body'].apply(body_to_sent)

## sentence count

In [None]:
# counts = df_comments['sent'].apply(len).values

In [None]:
# fig, ax = set_size_decorator(plt.subplots, fraction=0.5, ratio='4:3')(1, 1)

# ax.hist(counts, bins=14, range=(1, 15), histtype='step')
# ax.set_xlabel(r'\# stavkov')
# ax.set_ylabel(r'$N$')
# savefig('sent_count', tight_layout=False)

## char in body count

In [None]:
# char_counts = df_comments['body'].apply(len).values

In [None]:
# fig, ax = set_size_decorator(plt.subplots, fraction=0.5, ratio='4:3')(1, 1)

# plt.hist(char_counts, range=(0, 1000), bins=100, histtype='step')
# ax.set_xlabel(r'\# znakov v komentarju')
# ax.set_ylabel(r'$N$')
# savefig('char_comment_counts', tight_layout=False)

## char in sent count

In [None]:
class SentCharCounter:
    def __init__(self):
        self.counts = []
        
    def count(self, sent_lst):
        for s in sent_lst:
            self.counts.append(len(s))
        return self

In [None]:
# SC = SentCharCounter()

# sent_char_counts = df_comments['sent'].apply(SC.count)

In [None]:
# counts = sent_char_counts[0].counts

In [None]:
# fig, ax = set_size_decorator(plt.subplots, fraction=0.5, ratio='4:3')(1, 1)

# ax.hist(counts, range=(0, 500), bins=100, histtype='step')
# ax.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
# ax.set_xlabel(r'\# znakov v stavku')
# ax.set_ylabel(r'$N$')
# savefig('sent_word_count', tight_layout=False)

## unique word count 

In [None]:
from collections import Counter

class WordCounter:
    def __init__(self):
        self.dct = dict()
        
    def count_words(self, s):
        count = dict(Counter(s.split()))
        for k, v in count.items():
            if k not in self.dct:
                self.dct[k] = v
            else:
                self.dct[k] += v

        return self

In [None]:
# WC = WordCounter()

# res = df_comments['body'].swifter.apply(WC.count_words)

In [None]:
# word_dct = res[0].dct

In [None]:
# sorted_word_dct = {k: v for k, v in sorted(word_dct.items(), key=lambda item: item[1], reverse=True)}

In [None]:
# wv = list(sorted_word_dct.values())[:50]
# wk = list(sorted_word_dct.keys())[:50]

In [None]:
# fig, ax = set_size_decorator(plt.subplots, fraction=0.5, ratio='4:3')(1, 1)

# ax.bar(wk, wv)
# plt.xticks(rotation=90, fontsize=5)
# ax.minorticks_off()
# ax.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
# savefig('word_count', tight_layout=False)

In [None]:
df_comments = df_comments._to_pandas()
ray.shutdown()

## bitstream for RNG

In [None]:
from benford_helper_functions import str_to_bits

In [None]:
top_1000_words = ['the', 'to', 'I', 'a', 'and', 'of', 'is', 'in', 'that', 'you', 'it', 'for', 'was', 'my', 'with', 'on', 'but', 'have', 'be', 'not', 'are', 'just', 'like', 'as', 'or', 'so', 'they', 'this', 'at', 'if', 'me', 'can', 'your', 'The', 'get', 'about', 'from', 'would', 'all', 'one', 'do', 'an', 'people', 'when', 'up', 'out', 'more', 'what', 'her', 'because', "don't", "I'm", 'we', 'had', 'he', 'i', 'some', 'think', 'will', "it's", 'by', 'them', 'really', 'their', 'how', 'has', 'no', 'only', 'know', 'who', 'even', 'than', 'good', 'there', 'time', 'she', 'his', 'then', 'It', 'other', 'If', 'got', 'want', 'You', 'still', 'much', 'were', 'make', 'been', 'also', 'being', 'it.', 'go', 'into', 'any', 'could', 'see', 'never', 'My', 'very', 'But', 'And', 'need', 'way', 'use', "It's", 'which', '-', 'most', 'first', 'going', 'him', 'after', 'something', 'where', 'This', 'too', 'I’m', 'say', 'same', 'should', 'lot', 'back', 'over', 'did', 'better', 'A', 'actually', 'now', 'every', 'So', 'pretty', 'always', 'why', 'don’t', 'someone', 'They', 'off', 'those', 'it’s', 'feel', 'since', 'That', "I've", 'work', 'thing', 'take', 'before', 'things', 'new', 'while', 'probably', "you're", 'am', 'many', 'its', 'years', 'love', 'around', 'game', 'made', 'said', "didn't", '2', 'sure', 'right', 'our', 'best', 'getting', "that's", "can't", 'Not', "doesn't", 'We', 'does', 'down', 'few', 'find', 'used', 'day', 'He', 'bad', 'What', 'enough', 'without', 'long', 'me.', 'ever', 'doing', 'look', 'thought', 'two', 'give', 'life', 'well', 'having', 'might', 'makes', 'In', 'different', 'little', 'anything', 'through', 'it,', 'these', 'already', 'try', 'both', 'When', 'put', "I'd", 'character', 'shit', 'mean', 'last', 'Just', 'great', 'own', 'characters', 'us', 'trying', 'until', 'another', '3', 'No', 'least', 'went', 'keep', 'There', 'point', 'person', 'old', "isn't", 'here', 'She', 'big', '&gt;', 'using', 'It’s', 'hard', 'that.', 'come', 'play', 'next', 'everyone', 'For', 'able', "That's", 'them.', 'end', 'guy', 'bit', '5', 'world', 'tell', 'kind', 'money', 'part', 'help', 'whole', 'maybe', 'everything', 'remember', 'As', 'show', 'How', 'lol', 'time.', 'high', 'once', 'year', 'damage', 'less', 'live', 'though', '4', 'seen', 'each', 'nothing', 'told', 'may', 'stuff', 'start', 'team', 'fucking', 'saying', 'friends', "I'll", 'started', 'gonna', 'literally', 'main', 'making', 'real', 'away', 'reason', 'far', 'guess', 'anyone', 'such', "they're", '1', 'looking', 'wanted', 'I’ve', 'definitely', 'watch', "there's", 'came', 'believe', 'between', 'gets', 'read', 'friend', 'talking', 'almost', 'man', 'you.', 'times', 'let', 'myself', 'care', 'school', 'Is', 'nice', 'else', 'understand', 'story', 'Also', 'seems', 'dont', 'found', 'done', 'saw', 'level', 'either', 'second', 'Oh', 'full', 'change', 'didn’t', 'set', 'buy', 'fuck', 'me,', 'Why', 'hate', 'place', 'instead', 'looks', 'kids', 'hope', 'called', 'anime', 'post', 'Then', 'run', 'hit', 'free', 'name', 'fun', "wouldn't", 'All', 'heard', 'too.', '10', 'left', 'idea', 'One', 'stop', 'Or', 'you’re', 'took', 'worth', 'usually', 'family', 'playing', 'job', 'ask', 'movie', 'during', "wasn't", 'call', 'single', 'quite', 'tried', 'Yeah', 'can’t', 'that’s', 'girl', 'home', 'pay', 'comes', 'top', 'kinda', 'banner', 'basically', 'Do', 'means', 'wrong', 'Thank', 'small', 'support', 'super', 'days', 'question', 'Because', 'again', 'talk', 'out.', 'Like', "won't", 'build', 'now.', 'water', 'up.', 'Maybe', 'Well', 'fact', 'People', 'that,', 'against', 'rather', 'thinking', 'At', 'wish', 'half', 'car', 'though.', 'problem', 'mind', 'women', 'ones', 'games', 'working', 'cause', 'under', '*', 'house', 'couple', 'especially', 'this.', 'To', 'entire', 'sex', 'side', "Don't", 'completely', 'food', "he's", 'goes', 'asked', 'likely', 'close', 'pull', 'Now', 'mom', 'I’d', 'later', 'comment', 'matter', 'watching', 'weird', 'doesn’t', 'parents', "aren't", 'absolutely', 'there.', 'felt', 'Even', 'u', 'video', 'hear', '&amp;', 'Your', 'happened', 'amount', 'hours', 'kid', "she's", 'star', 'sounds', 'wait', 'Some', 'one.', 'knew', 'eat', 'happy', 'seem', ':)', 'others', 'guys', 'well.', 'leave', 'months', 'often', 'open', 'cool', 'head', 'kill', 'yet', 'country', 'works', 'case', 'taking', 'needs', '+', 'coming', 'power', 'you,', 'says', 'based', 'im', "haven't", 'sense', 'become', 'whatever', 'day.', 'exactly', 'lost', 'rest', 'sometimes', 'similar', 'Zhongli', 'crit', 'due', 'night', 'must', 'lol.', 'him.', 'dps', 'time,', 'weapon', 'enjoy', 'Yeah,', 'experience', 'easy', 'Thanks', "There's", 'agree', 'spend', 'gave', 'human', 'body', 'her.', 'certain', 'turn', 'men', 'etc.', 'answer', 'check', 'normal', 'ago', 'unless', 'dad', 'yourself', '6', 'fine', 'life.', 'move', 'That’s', 'favorite', 'music', 'ass', 'song', 'Most', 'Good', 'huge', 'burst', 'actual', 'seeing', 'week', 'space', 'attack', 'die', 'running', 'all.', 'takes', 'black', 'watched', 'them,', 'worked', 'add', 'past', 'again.', 'woman', ',', 'Also,', 'save', 'living', "couldn't", 'room', 'Its', 'deal', 'outside', 'people.', 'energy', 'type', 'per', 'played', 'system', 'low', 'content', 'phone', 'number', 'current', 'true', 'face', 'possible', 'feels', 'Yes', '.', 'good.', 'episode', '20', 'gives', 'chance', 'here.', 'behind', 'straight', 'looked', 'mostly', 'I’ll', 'event', 'early', 'feeling', 'wife', 'isn’t', 'three', 'stay', 'learn', 'amazing', 'hot', 'is.', 'sound', 'shield', 'physical', 'After', "you'll", 'happen', 'yeah', 'course', 'front', 'way.', 'minutes', 'middle', 'asking', 'fight', 'extra', 'thank', 'important', 'original', 'shows', '?', 'imagine', 'sleep', 'stupid', 'hell', 'sort', 'Are', 'finally', 'higher', 'damn', 'series', 'needed', 'hand', 'artifacts', 'together', '100%', 'honestly', 'on.', 'given', 'wants', 'ended', 'random', "You're", 'worst', 'wanna', 'child', 'On', 'break', 'social', 'Yes,', 'DPS', 'Probably', 'specific', 'US', 'worse', 'Can', 'interesting', 'game.', 'thing.', 'do.', 'giving', 'scene', 'bring', 'issue', 'near', 'turned', 'rate', 'they’re', 'thanks', 'meant', 'clear', 'bunch', 'line', 'pick', 'now,', 'is,', 'multiple', 'death', 'dead', 'up,', 'order', 'years.', 'supposed', 'decided', 'girls', 'Being', 'common', 'word', 'god', 'abyss', 'With', 'version', 'future', 'it?', 'simply', 'strong', 'season', 'Well,', 'please', 'yes', 'sorry', 'longer', 'though,', '(and', 'large', 'white', "we're", 'Have', 'light', 'weeks', 'difference', 'age', 'Same', 'easier', 'reading', 'issues', 'fast', 'loved', 'Never', 'along', 'work.', 'account', 'Hu', '8', 'dog', 'company', 'young', 'cryo', 'No,', 'this,', 'pyro', 'piece', 'lose', 'Which', 'totally', 'cut', '(I', 'building', 'happens', 'alone', 'older', 'not.', 'electro', 'general', 'except', 'telling', '2.', 'realize', 'Venti', 'short', '/', 'easily', 'kept', 'decent', 'wasn’t', 'group', 'funny', 'hold', 'walk', '30', 'towards', 'Im', 'Did', 'Ganyu', 'better.', 'extremely', 'out,', 'currently', 'there’s', 'Eula', 'dude', 'American', 'Any', 'bought', 'consider', '&amp;#x200B;', 'glad', 'spent', 'coffee', 'Only', 'Bennett', 'knows', 'control', 'quality', 'waiting', 'late', 'soon', 'class', 'personal', 'Every', 'within', 'mine', 'met', 'several', 'allowed', 'God', 'Lol', 'moment', 'liked', 'cannot', 'inside', 'month', 'themselves', 'standard', 'crazy', 'forget', '7', 'thats', 'party', 'relationship', 'said,', '"I', 'perfect', 'dmg', 'wouldn’t', 'across', 'health', 'drink', 'brother', 'list', 'built', 'sad', 'expect', 'example', 'recommend', 'voice', 'to.', 'book', 'self', 'stuck', 'wonder', 'taste', 'pain', 'stopped', 'choose', 'drop', 'beat', 'mean,', 'state', 'personally', 'floor', 'in.', '12', 'paid', 'sub', 'cost', 'compared', 'pity', 'Was', 'stand', 'Please', 'assume', 'died', 'depends', 'poor', '=', 'right?', 'fan', 'brain', 'whether', 'Of', 'hour', 'Diluc', 'listen', 'weapons', 'Does', 'mental', 'off.', 'explain', 'players', 'there,', 'prefer', 'lots', 'eating', 'gay', 'killed', 'children', 'mother', 'eventually', 'know,', 'hair', 'E', 'Who', '1.', "you've", 'questions', 'area', 'figure', 'Genshin', 'His', 'fucked', 'well,', 'bed', 'known', 'shot', 'above', 'date', 'Jean', 'too,', 'door', 'gotten', 'day,', 'much.', 'point.', 'drive', 'cant', 'Klee', 'Diona', 'things.', 'generally', 'eyes', 'artifact', 'shit.', 'government', 'fall', 'enemies', 'problems', 'following', 'opinion', 'lower', 'form', 'Those', 'boss', 'considered', 'Reddit', 'red', 'skill', 'yes,', 'Go', 'public', 'learned', 'movies', 'popular', 'pulled', 'you!', 'ok', 'OP', 'mention', 'changed', 'art', 'war', 'he’s', 'recently', 'fit', 'luck', 'college', 'hurt', 'people,', 'wear', 'lmao', 'resin', 'tho', 'miss', 'sister', 'gone', 'stars', '15', 'New', 'fully', 'average', 'walking', 'taken', 'back.', 'more.', 'one,', 'interested', ':(']

In [None]:
df_comments.sort_values(by=['score'], inplace=True, ascending=False)

In [None]:
# def text_to_bitstream(text_lst, max_bits=10**6):
#     bit_streams = [[] for i in range(8)]
#     for count, text in enumerate(text_lst):
#         bits = str_to_bits(text, one_byte=False, remove_spaces=True, to_replace=top_1000_words[:256])
#         bits_lst = bits.split(" ")   
        
#         for byte in bits_lst:
#             for i, b in enumerate(byte.zfill(8)):
#                 bit_streams[i].append(b)
        
#         bit_count = len(bit_streams[0])
        
#         if count % 5000 == 0:
#             print(bit_count / max_bits * 100)
        
#         if bit_count > max_bits:
#             return bit_streams, count
        
#     return bit_streams

In [None]:
# bit_streams, count = text_to_bitstream(df_comments['body'].values, max_bits=100 * 10**6)

In [None]:
from NIST_tests import RNG_test

In [None]:
# test_n_bit_streams = 1
# max_bits = 10**6

# results = []
# for c, bit_stream in enumerate(bit_streams[2:]):
#     print(c)
#     bits = ''.join(bit_stream)
    
#     bit_pos_results = []
#     for i in range(test_n_bit_streams):
#         test_bits = bits[i*max_bits:max_bits]
#         res = RNG_test(test_bits)
#         bit_pos_results.append(res)
        
#     results.append(bit_pos_results)

## LCG

In [None]:
from random_helper_functions import bin_str_to_matrix, split_to_arr

In [None]:
# r = ''.join(bit_streams[7])

In [None]:
# rr = r[10**6:2*10**6]

In [None]:
# RNG_test(rr)

In [None]:
# m = bin_str_to_matrix(split_to_arr(rr))

In [None]:
def make_LCG_bits(bits, n=32, num_bits=10**6, a=48271, c=0, mod=2**32, k=0, no_chunked=True):
    m = len(bits) // n

    bits_chunked = [bits[i*m:(i+1)*m] for i in range(n)]
    
    new_bits = ''
    for i in range(m):

        if no_chunked:
            b = bits[i*n:(i+1)*n]
        else:
            b = ''
            for j in range(n):
                b += bits_chunked[j][i]
        
        if mod != 0:
            b = bin((int(b, 2) * a + c) % mod)[2:]
        else:
            b = bin(int(b, 2) * a + c)[2:]
        
        if k != 0:
            new_bits += b[int(len(b) - len(b) * k):]
        else:
            new_bits += b[len(b)//2:]
        
        if len(new_bits) > num_bits:
            return new_bits, i * n
        
    return new_bits

In [None]:
# bit_str = ''.join(bit_streams[7])
# st, used = make_LCG_bits(bit_str, num_bits=10**6,
#                          a=1664525, c=1013904223, mod=2**32 - 1, k=0, n=32)

In [None]:
# RNG_test(st)

In [None]:
# used / 10**6

## chunks

In [None]:
def make_bit_chunk(bits, n):
    m = len(bits) // n
    bits_chunked = [bits[i*m:(i+1)*m] for i in range(n)]
    return bits_chunked


def make_bit_chunks(bits, n=32, splits=2, prnt=False):
    end_parts, elements = n**(splits + 1), len(bits) // n**(splits + 1)
    if prnt:
        print(f'end parts: {end_parts} with {elements} elements')
    bits_chunked = make_bit_chunk(bits, n)
    
    if splits == 0:
        return bits_chunked, end_parts, elements

    for split in range(splits):
        split_chunks = []
        for chunk in bits_chunked:
            split_chunks += make_bit_chunk(chunk, n)
        bits_chunked = split_chunks
    
    return bits_chunked, end_parts, elements


def make_bitstring_from_chunks(bits, num_bits=10**6, **kwargs):
    bits_chunked, n_chunks, elements = make_bit_chunks(bits, **kwargs)
    
    bitstring = ''
    for i in range(elements):
        for j in range(n_chunks):
            b = bits_chunked[j][i]
            bitstring += b
            if len(bitstring) > num_bits:
                return bitstring
        
    return bitstring

In [None]:
#st = np.arange(0, 12, 1).astype(str)
st = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l']
st = ''.join(st).upper()

In [None]:
def multi_mix(st, n_mixes=None, chunks=None):
    starting_st = st
    
    if chunks is None:
        n = int(np.sqrt(len(st))) - 1
    else:
        n = chunks
    print(f'splits: {n}')
    
    if n_mixes is None:
        n_mixes = n
        
    for i in tqdm(range(n_mixes)):
        st = make_bitstring_from_chunks(st, n=n, splits=1)
        if st == starting_st:
            print('sequence repeated! returnig last good combination!')
            return old_st
        old_st = st
    
    return st

In [None]:
multi_mix(st, chunks=3)

In [None]:
# c = make_bitstring_from_chunks(bit_str, num_bits=10**6, n=32, splits=2)

In [None]:
text = df_comments['body']

In [None]:
full_text = ''.join(text)[:10**6]

In [None]:
spaces_bits = str_to_bits(full_text, to_replace=top_1000_words[:100], remove_spaces=True)

In [None]:
list_bits = list(spaces_bits.split(" "))

last_bit = ''
for b in list_bits:
    last_bit += b[-1]

In [None]:
# RNG_test(last_bit[:2*10**6][::2])

In [None]:
# mm = multi_mix(last_bit[:1*10**6], n_mixes=10)

In [None]:
# RNG_test(mm)

## diag

In [None]:
def valid_shapes(num):
    shapes = []
    lim = int(np.sqrt(num))
    for i in range (1, lim):
        if num % i == 0:
            shapes.append([i, int(num/i)])
    
    return shapes[::-1]

In [None]:
import itertools

def diag_rng(bit_arr, reverse_shapes=False, reverse_sort=False):
    if reverse_shapes:
        shapes = valid_shapes(len(bit_arr))[:-1][::-1]
    else:
        shapes = valid_shapes(len(bit_arr))[:-1]

    for shape in tqdm(shapes):
        new_bit_arr = bit_arr.reshape(shape[0], shape[1])
        
        m = max(shape)
        r = np.arange(-m, m + 1, 1)
        
        new_s = []
        for i in r:
            s = np.diag(new_bit_arr, k=i).astype(str)
            if len(s) != 0:
                new_s.append(''.join(s))
        
        new_s.sort(key=lambda x: len(x[0]), reverse=reverse_sort)
        new_s = list(itertools.chain.from_iterable(new_s))
        new_s = ''.join(new_s)
        
        bit_arr = split_to_arr(new_s)
    
    return new_s

In [None]:
a = last_bit[:10**6]

# a = diag_rng(split_to_arr(a), reverse=False)
# a = make_bitstring_from_chunks(a, num_bits=1*10**6, n=32, splits=0)
diag_bits = diag_rng(split_to_arr(a))
mm = multi_mix(diag_bits, n_mixes=1, chunks=32)
# diag_bits = diag_rng(split_to_arr(mm))

In [None]:
RNG_test(mm)

In [None]:
a = last_bit[:1*10**6]
a = make_bitstring_from_chunks(a, num_bits=1*10**6, n=32, splits=2)
a_arr = split_to_arr(a)

In [None]:
diag_bits = diag_rng(a_arr)

In [None]:
RNG_test(diag_bits)

In [None]:
RNG_test(a[::2])

In [None]:
# from bitstring import BitArray

# def float_from_bitstring(bitstring):
#     return BitArray(bin=bitstring).float

In [None]:
def make_ints_with_n_bits(bits, n):
    m = len(bits) // n
    
    ints = []
    z = 0
    for i in range(m):
        take = bits[i*n:(i+1)*n]
        make_int = int(take, 2)
        if make_int != 0:
            ints.append(make_int)
        else:
            z += 1
    
    print(f'{z} total zeros')
    return np.array(ints)

In [None]:
def reshape_and_truncate(arr, shape):
    desired_size_factor = np.prod([n for n in shape if n != -1])
    if -1 in shape:  # implicit array size
        desired_size = arr.size // desired_size_factor * desired_size_factor
    else:
        desired_size = desired_size_factor
    return arr.flat[:desired_size].reshape(shape)

In [None]:
def text_lognormal_dist(bits, n, d):
    """
    bits: str
        Sequence of bits
    n: int
        Number of bits to take together in bits sequence
    d: int
        Number of multiplications
    """
    ints = make_ints_with_n_bits(last_bit, n=n)
    ints_mat = reshape_and_truncate(bits, (len(ints) // d, d))
    ints_prod = np.prod(ints_mat, axis=1).astype(np.float32)
    return ints_prod