In [1]:
import sys
import random
import helpers

random.seed(2026)

In [2]:
_PRIME_NUMBERS_LIST = []
_NON_PRIME_NUMBERS_LIST = []

while len(_PRIME_NUMBERS_LIST) < 1000 or len(_NON_PRIME_NUMBERS_LIST) < 1000:
    number = random.randint(5000000000000000, sys.maxsize)

    if helpers.calculate_is_prime(number) and len(_PRIME_NUMBERS_LIST) < 1000:
        _PRIME_NUMBERS_LIST.append(number)
    elif not helpers.calculate_is_prime(number) and len(_NON_PRIME_NUMBERS_LIST) < 1000:
        _NON_PRIME_NUMBERS_LIST.append(number)

print(len(_PRIME_NUMBERS_LIST), len(_NON_PRIME_NUMBERS_LIST))

1000 1000


In [3]:
import pandas as pd

df = pd.DataFrame(
    {"number": sorted(_PRIME_NUMBERS_LIST + _NON_PRIME_NUMBERS_LIST, reverse=True)}
)

df

Unnamed: 0,number
0,9221393133747789606
1,9218877568944023837
2,9218310977093372168
3,9216495880920913229
4,9211929070971223639
...,...
1995,35012787445999063
1996,31129236956473739
1997,28379780060336611
1998,19842342795337627


In [4]:
df["is_prime"] = df["number"].apply(helpers.calculate_is_prime)
df["binary"] = df["number"].apply(helpers.convert_number_to_binary_representation)
df["decimal"] = df["number"].apply(helpers.convert_number_to_decimal_representation)
df

Unnamed: 0,number,is_prime,binary,decimal
0,9221393133747789606,False,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, ...","[9, 2, 2, 1, 3, 9, 3, 1, 3, 3, 7, 4, 7, 7, 8, ..."
1,9218877568944023837,True,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, ...","[9, 2, 1, 8, 8, 7, 7, 5, 6, 8, 9, 4, 4, 0, 2, ..."
2,9218310977093372168,False,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, ...","[9, 2, 1, 8, 3, 1, 0, 9, 7, 7, 0, 9, 3, 3, 7, ..."
3,9216495880920913229,True,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, ...","[9, 2, 1, 6, 4, 9, 5, 8, 8, 0, 9, 2, 0, 9, 1, ..."
4,9211929070971223639,False,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, ...","[9, 2, 1, 1, 9, 2, 9, 0, 7, 0, 9, 7, 1, 2, 2, ..."
...,...,...,...,...
1995,35012787445999063,True,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, ...","[0, 0, 3, 5, 0, 1, 2, 7, 8, 7, 4, 4, 5, 9, 9, ..."
1996,31129236956473739,True,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, ...","[0, 0, 3, 1, 1, 2, 9, 2, 3, 6, 9, 5, 6, 4, 7, ..."
1997,28379780060336611,False,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, ...","[0, 0, 2, 8, 3, 7, 9, 7, 8, 0, 0, 6, 0, 3, 3, ..."
1998,19842342795337627,True,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, ...","[0, 0, 1, 9, 8, 4, 2, 3, 4, 2, 7, 9, 5, 3, 3, ..."


In [13]:
first_row_lens = (len(df.iloc[0]["binary"]), len(df.iloc[0]["decimal"]))
last_row_lens = (len(df.iloc[-1]["binary"]), len(df.iloc[-1]["decimal"]))

print(f"First row lens: {first_row_lens}")
print(f"Last row lens: {last_row_lens}")

assert first_row_lens == last_row_lens, "All binary and decimal representations must have the same length"

First row lens: (63, 19)
Last row lens: (63, 19)


In [14]:
prime_count = df["is_prime"].sum()
not_prime_count = len(df) - prime_count

print(f"Prime count: {prime_count}, Not prime count: {not_prime_count}")
assert prime_count == not_prime_count == 1000, "There must be exactly 1000 prime and 1000 non-prime numbers"

Prime count: 1000, Not prime count: 1000


In [15]:
df.to_csv("prime_dataset.csv", index=False)