# Prime testing with a neural network

## Imports

In [1]:
import tensorflow as tf
import numpy as np

import sympy
import random

## Parameters

In [2]:
magnitude = 10
max_number = 10 ** magnitude
card_total = 10 ** 6
percentage_primes = 0.5
card_primes = int(percentage_primes * card_total)
card_non_primes = card_total - card_primes
percentage_train_set = 0.9
card_train_set = int(percentage_train_set * card_total)
card_test_set = card_total - card_train_set

print(max_number)
print(card_total)
print(card_primes)
print(card_non_primes)
print(card_train_set)
print(card_test_set)

10000000000
1000000
500000
500000
900000
100000


## Generation of input data

### Generation of primes

In [3]:
%%time
primes = set()
while True:
    a_prime = sympy.randprime(1, max_number)
    if a_prime not in primes:
        primes.add(a_prime)
    if len(primes) == card_primes:
        break

Wall time: 37.8 s


In [4]:
len(primes)

500000

### Generation of non-primes

In [5]:
%%time
rands = np.random.randint(max_number, size=card_non_primes, dtype=np.int64)
rands = set(rands)
rands = {x for x in rands if not sympy.isprime(x)}
non_primes = rands
while True:
    a_rand = np.random.randint(max_number, dtype=np.int64)
    if not sympy.isprime(a_rand) and a_rand not in non_primes:
        non_primes.add(a_rand)
    if len(non_primes) == card_non_primes:
        break

Wall time: 2.12 s


In [6]:
len(non_primes)

500000

In [7]:
rand_numbers = list(primes.union(non_primes))
random.shuffle(rand_numbers)

In [8]:
len(rand_numbers)

1000000

In [9]:
labels = [1 if sympy.isprime(x) else 0 for x in rand_numbers]

In [10]:
sum(labels)

500000

## Build neural network

### Coding of numbers

In [11]:
def transform_number_to_array(number):
    return [int(x) / 10 for x in str(number).zfill(magnitude)]

### Generation of train set and labels

In [12]:
train_set = np.array([transform_number_to_array(x) for x in rand_numbers[:card_train_set]])
train_labels = np.array([sympy.isprime(x) for x in rand_numbers[:card_train_set]])

In [13]:
print(train_set.shape)
print(train_labels.shape)

(900000, 10)
(900000,)


## Generation of test set and labels

In [14]:
test_numbers = rand_numbers[card_train_set:]
test_set = np.array([transform_number_to_array(x) for x in rand_numbers[card_train_set:]])
test_labels = np.array([sympy.isprime(x) for x in rand_numbers[card_train_set:]])

In [15]:
print(test_set.shape)
print(test_labels.shape)

(100000, 10)
(100000,)


In [16]:
print(sum(train_labels) + sum(test_labels))

500000


### Specification and compilation of the neural network

In [17]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(magnitude),
    tf.keras.layers.Dense(3, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(2)
])

In [18]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

### Training

In [19]:
model.fit(train_set, train_labels, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x22b6897f808>

## Evaluation

In [20]:
test_loss, test_acc = model.evaluate(test_set,  test_labels, verbose=2)

3125/3125 - 4s - loss: 0.4597 - accuracy: 0.7614 - 4s/epoch - 1ms/step


In [21]:
probability_model = tf.keras.Sequential([model, tf.keras.layers.Softmax()])

In [22]:
predictions = probability_model.predict(test_set)



In [23]:
for i in range(20):
    nb = test_numbers[i]
    prob = predictions[i]
    print("{}, {}, {:.0%}".format(sympy.isprime(nb), nb, prob[1]))

False, 9152640312, 2%
True, 2807304179, 69%
False, 2515080634, 1%
False, 1438361899, 70%
True, 2201127217, 56%
False, 7918550671, 68%
False, 5823640360, 0%
True, 2585916409, 69%
True, 6699799657, 56%
True, 625738957, 56%
False, 6109435559, 70%
True, 4283276183, 72%
True, 1108577317, 56%
True, 4908443093, 72%
True, 5195950429, 69%
True, 5311370767, 56%
True, 7224214357, 56%
True, 4920053891, 66%
True, 7570249433, 73%
False, 4842038371, 64%


In [24]:
nb_tot = 0
nb_primes = 0
nb_primes_detected = 0
nb_primes_not_detected = 0
nb_nonprimes_detected = 0
nb_nonprimes_not_detected = 0
for i, p in enumerate(predictions):
    nb_tot += 1
    if sympy.isprime(test_numbers[i]) == 1:
        nb_primes += 1
        if p[1] > 0.5:
            nb_primes_detected += 1
        else:
            nb_primes_not_detected += 1
    else:
        if p[1] <= 0.5:
            nb_nonprimes_detected += 1
        else:
            nb_nonprimes_not_detected += 1
print("total number of test numbers: {}".format(nb_tot))
print("total number of primes in test numbers: {}".format(nb_primes))
print("# primes detected correctly: {}".format(nb_primes_detected))
print("# primes detected not correctly: {}".format(nb_primes_not_detected))
print("# non-primes detected: {}".format(nb_nonprimes_detected))
print("# non-primes not detected: {}".format(nb_nonprimes_not_detected))

total number of test numbers: 100000
total number of primes in test numbers: 50136
# primes detected correctly: 50136
# primes detected not correctly: 0
# non-primes detected: 26002
# non-primes not detected: 23862
