### Basic working code

In [15]:
import math
import numpy as np
import sympy
import time 

In [16]:
def calc_time(start, end):
    seconds = end - start
    mins = seconds / 60
    print('Time:', str(seconds), '    ', str(mins))
    return seconds, mins

In [17]:
## I have kept the generator cell, but I generate more numbers in the next one. The prime number is set in this cell.
## bound is commented out, as it is already set, and the array of primes is already created

# n = 16921456439215439701
n = 12110279
the_number = n
s_const = int(math.sqrt(n)) + 1
A_const = (math.sqrt(2)-1)*math.sqrt(n)-1      

print(A_const)

1440.4559959137964


In [18]:
def f(x):
    return (x+s_const)**2 - the_number

In [19]:
# Final optimized version 
def get_primes_less_thanB(bound):                      # eratosthenes way
    check_until = int(bound**0.5)
    unmarked = np.array([x for x in range(2, bound)])
    q = 2
    i = 0
    while True:
        u = unmarked[i]
        if u> check_until:               # only check numbers up to sqrt(bound)
            break
        u_idx = np.where(unmarked == u)[0][0]
        max_idx = bound//u
        listof_multiples = [u*i for i in range(q, max_idx+1)]
        u_multiples = np.array(listof_multiples)
        unmarked = np.setdiff1d(unmarked, u_multiples)   # numpy set diff without changing order of elements in list
        q = u
        i +=1
    # print("final unmarked elements (for eye balling!): ", unmarked)
    print("Number of elements less than bound = ", bound, " is: ", len(unmarked))
    return (unmarked)

In [20]:
start = time.time()
bound = 10000
primes_less_thanB = get_primes_less_thanB(bound)
end = time.time()

calc_time(start, end)

Number of elements less than bound =  10000  is:  1229
Time: 0.03061366081237793      0.0005102276802062989


(0.03061366081237793, 0.0005102276802062989)

In [21]:
# Correct optimized version
def check_Bsmooth(primes, num):
    powers_of_primes= []
    num_remaining= num                          # keep dividing by primes in B smooth primes, until == 1
    for prime in primes: 
        #print("current prime: ", prime)
        i = 0                                   # i = power of prime
        if (num_remaining%prime == 0):
            while (num_remaining%prime ==0):   #keep dividing for each power of the prime
                quotient = num_remaining/prime
                num_remaining = quotient
                i +=1
        powers_of_primes.append(i)
        #print("num remaining: ", num_remaining)
        if num_remaining == 1:     # ---------------(**) INDENT PROPERLY!
            diff = len(primes)-len(powers_of_primes)
            if diff>0:
                zeros = [0 for k in range(diff)]
                return powers_of_primes + zeros
            return powers_of_primes
    #print("Number is not B smooth")
    return []

### Still more checks, on B-smooth, on Sieve; 

In [23]:
# Was checking check_Bsmooth function
num = 100
bound = 100
primes = np.array(get_primes_less_thanB(bound))

start = time.time()
result = check_Bsmooth(primes, num)
end = time.time()

calc_time(start, end)
#print(results)

Number of elements less than bound =  100  is:  25
Time: 0.0      0.0


(0.0, 0.0)

In [24]:
def sieve(A_const, bounded_primes):
    B_smooth_squares = []
    powers_of_primes = []
    for i in range(0, int(A_const)):
        k = f(i)
#         if i%100 ==0:
#             print("in this iteration i=", i, " candidate k is: ", k)
        exponents = check_Bsmooth(bounded_primes, k)
        if len(exponents)>0:                # same as if exponents != []
            B_smooth_squares.append(k)
            powers_of_primes.append(exponents)
            #print("found a B-smooth number(!) = ", k)
            #print("with prime powers: ", exponents)
        else:
            continue
    return (B_smooth_squares, powers_of_primes)

In [25]:
# primes defined above
start = time.time()
results = sieve(A_const, primes)
end = time.time()

calc_time(start, end)
print(results[0])
#print(results[1])

Time: 0.06623697280883789      0.0011039495468139649
[121, 14045, 153725, 174746, 237917, 613210, 763465, 799370, 1197625, 1743005, 1772797, 2535650, 2658370, 3279650, 3342482, 3690346, 3985865, 4066205, 7674425, 8428745, 8884445, 9280346, 9298850, 10234250, 11519042, 11782265]


In [14]:
print("Number of B smooth candidates with bound: ", bound, " are ", len(results[0]))

Number of B smooth candidates with bound:  100  are  26


### More tests, ignore to run main prog; start after Markdown cell again

In [26]:
bound = 120
primes = np.array(get_primes_less_thanB(bound))

Number of elements less than bound =  120  is:  30


In [18]:
start = time.time()
result = sieve(A_const, primes)
end = time.time()

calc_time(start, end)
print("Number of B smooth candidates with bound: ", bound, " are ", len(result[0]))

In [19]:
B_smooth_squares = results[0]
# print(B_smooth_squares)
powers_of_primes = results[1]
# print(powers_of_primes)

In [45]:
exp_array = (np.array(powers_of_primes))%2
rownum = len(exp_array)
colnum = len(exp_array[0])
print("num rows: ", rownum, "num columns ", colnum)

exp_array = np.transpose(exp_array)
rownum = len(exp_array)
colnum = len(exp_array[0])
print("num rows: ", rownum, "num columns ", colnum)
# print(exp_array)

num rows:  53 num columns  46
num rows:  46 num columns  53


In [37]:
# Just checking how long this alternative you had takes; seems comparable in terms of time
# But this method actually doesn't find ANY candidates k, because the check at (**) means you check 
# whether EVERY one of the primes in'B smooth primes' divide the candidate k! But that's too strict a requirement!
# It also does not check if one prime repeatedly divides k - we need to do that; e.g. 11 goes into 2658370 3 times!

def sieve2(A_const, primes):
    sieve_array = []

    for i in range(0, int(A_const)):
        k = f(i)
        #print(k)
        trigger = 0
        flag_cont = 0

        for j in range(0, len(primes)):
            if(k%primes[j]!=0):   # if any prime does not divide k ----------------(**)
                flag_cont = 1     # then set flag to 1

        if flag_cont == 1:
            flag_cont = 0
            continue
        else:
            sieve_array.append(k)

    print(len(sieve_array))
    

In [40]:
start = time.time()
result = sieve2(A_const, primes)
end = time.time()
calc_time(start, end)

0
Time: 0.05785202980041504      0.000964200496673584


(0.05785202980041504, 0.000964200496673584)

### Start here (but load sieve, checkBsmooth, primes less than B smooth)
### Checking whether we still get x is  3480 , y is  11.0 GIVEN the new mod2 matrix is DIFFERENT!

In [27]:
# A first simulation
bound = 40
primes = np.array(get_primes_less_thanB(bound))

Number of elements less than bound =  40  is:  12


In [28]:
## this is the generator cell, it works for the small tests I am doing, but we will have to update it later
nums = []
results=[]
size_of_bound = len(primes)

numbers = []
count_found = 0
begin = int(math.sqrt(n)) + 1
i = 0
start = time.time()
while(True):
    temp = check_Bsmooth(primes, (begin**2)%n)
    if temp!=[]:
        #print("exponents ", temp)
        exponents = np.array(temp)
        result = np.power(primes, exponents)
        the_num = np.prod(result)
        #print("the number itself: ", the_num)
        numbers.append(the_num)
        
        nums.append(begin)
        results.append(temp)
        count_found = count_found +1
            
    if count_found> size_of_bound +3:
        break
    begin +=1
    i +=1
end = time.time()
print("iterations, ", i)
print("B smooth numbers found: ", numbers, "total: ", len(numbers))
#print("Should equal nums: ", len(nums))
calc_time(start, end)
print (results)

iterations,  4468
B smooth numbers found:  [121, 2658370, 3342482, 390963, 696388, 927979, 4539612, 5387844, 11957764, 484, 10633480, 397854, 538461, 1527246, 2254230, 2619309] total:  16
Time: 0.1523151397705078      0.002538585662841797
[[0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0], [1, 0, 1, 0, 2, 3, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 1, 2, 0, 0, 0, 1, 1, 0], [0, 1, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0], [2, 0, 0, 2, 1, 0, 1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 2, 2, 1, 0, 0, 0, 0], [2, 1, 0, 1, 1, 0, 3, 0, 0, 0, 0, 0], [2, 1, 0, 4, 1, 0, 1, 0, 0, 0, 0, 0], [2, 0, 0, 2, 0, 2, 0, 2, 0, 0, 0, 0], [2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0], [3, 0, 1, 0, 2, 3, 0, 0, 0, 0, 0, 0], [1, 2, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0], [0, 3, 0, 2, 1, 0, 0, 0, 0, 0, 0, 1], [1, 2, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0], [1, 4, 1, 0, 2, 0, 0, 0, 1, 0, 0, 0], [0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0]]


In [39]:
## store_pows is the original matrix of powers, which we use later in the computation of y
# Got rid of the 'matrix' calculation from 'results', since results already has no empty list
# produces same array; checked using 'np.array_equal(matrix, results)'

np_array = np.array(results)
store_pows = np_array.copy()

np_array = np.transpose(np_array)
print(np_array %2)
# print(store_pows)

[[0 1 1 0 0 0 0 0 0 0 1 1 0 1 1 0]
 [0 0 0 1 0 0 1 1 0 0 0 0 1 0 0 1]
 [0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1]
 [0 0 1 0 1 0 1 1 0 0 0 0 1 0 0 1]
 [0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 1 0 1 1 0 0 0 0 0 1 0 1]
 [0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0]]


In [45]:
a = (np_array%2)
n = len(a)            # row number + number of equations we have
m = len(a[0])        # column number + number of primes (in factor base)

b =  np.array([[0 for i in range(n)]])     # all zeros
a = np.concatenate((a, b.T), axis=1)
print(a)
print(len(a))

[[0 1 1 0 0 0 0 0 0 0 1 1 0 1 1 0 0]
 [0 0 0 1 0 0 1 1 0 0 0 0 1 0 0 1 0]
 [0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0]
 [0 0 1 0 1 0 1 1 0 0 0 0 1 0 0 1 0]
 [0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0]
 [0 0 0 0 1 0 1 1 0 0 0 0 0 1 0 1 0]
 [0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0]]
12


In [47]:
# changed it to remove all zero rows from A
def drop_zero_rows(A): 
    return A[~np.all(A == 0, axis=1)]

In [None]:
# I got rid of the upp_triangular function since it wasn't necessary to use
# aug_matrix = upp_triangular(row_num, col_num,  aug_matrix)

In [51]:
from sympy import *

# aug_matrix = a
# row_num = len(a)
col_num = len(a[0])

L = Matrix(a)
# print(L.rref())
aug_matrix = np.array(L.rref()[0])
aug_matrix  = drop_zero_rows(aug_matrix)

print("Reduced row echelon form is: ")
print(aug_matrix)
print(len(a))


Reduced row echelon form is: 
[[0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 -1 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0]]
12


In [54]:
# v: 0s are non-free variables, 1s are fixed variables which will be 1 (free or non-free),
# 2s are fixed variables which will be 0 (free or non-free)
# - Just changed the inner loop to start from 'i', not 0

v = np.ones(col_num, dtype = 'int')

row_num = len(aug_matrix)   # the row_num has changed after dropping the 0 rows

# find the leading entries, aka the non-free variables, set them to 0 in the v vector
for i in range(0, row_num):
    check_main = 1
    for j in range(i, col_num):
        if(aug_matrix[i][j]==1 and check_main ==1):
            v[j]=0
            break

print(v)

[1 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 1]


In [55]:
# set one of the free variable to 1, all the rest of the free variables to 2.
## THIS IS A HUGE PLACE FOR OPTIMIZATION, IF WE PICK THE RIGHT FREE VARIABLE LIFE WILL BE EASIER LATER

delete_ones = False
for i in range(0, col_num):
    if(delete_ones==True and v[i]==1):
        v[i]=2
    if(v[i]==1):
        delete_ones = True

print(v)

[1 0 0 0 0 0 0 2 2 2 2 0 0 0 0 0 2]


In [56]:
print((np.argwhere(v == 0).flatten()))

[ 1  2  3  4  5  6 11 12 13 14 15]


#### I actually don't understand what's happening in the cell below
#### Do you think you can: 1. Give the action in this cell a name (for easy reference)
#### 2. Explain what it does in a section of our paper?

In [58]:
# calculate the other variables according to the values of the free variables we have:

for i in reversed(range(0, row_num)):
    sum = np.multiply(v, aug_matrix[i]).sum() # equal to sum = sum + sum + v[j]*aug_matrix[i][j] for all j
    sum = sum%2;
    zer =(np.argwhere(v == 0).flatten())
    if(len(zer)!=0):
        index = zer[len(zer)-1]   # why are we using the length of the array?
    
        if(sum==0):
            v[index] = 2
        else:
            v[index] = 1
        
print(" final v vector is ", v%2)
        

2
True
2
True
2
True
2
True
2
True
2
True
0
True
4
True
4
True
2
True
4
True
 final v vector is  [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


In [38]:
# calculate the other variables according to the values of the free variables we have:

for i in reversed(range(0, row_num)):
    sum =0
    for j in range(0, col_num):
        sum = sum + v[j]*aug_matrix[i][j];
    zer =(np.argwhere(v == 0).flatten())     # an
    sum = sum%2;
    if(len(zer)!=0):
        index = zer[len(zer)-1]   # why are we using the length of the array?
    
        if(sum==0):
            v[index] = 2
        else:
            v[index] = 1
        
print(" final v vector is ", v%2)
        

[1 0 0 0 0 0 0 2 2 2 2 0 0 0 0 0 2]
 final v vector is  [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


In [39]:
x = 1;
y = 1;

for i in range(0, col_num):
    if v[i] ==1:
        x = x * nums[i]
        #for j in range(0, len(primes)):
        #    if(store_pows[i][j]!=0):
        #        #print(int(y*(primes[j]**store_pows[i][j])))
        #        y = (y* ((primes[j]**store_pows[i][j])))

        
y_v  = np.zeros(size_of_bound)

for i in range(0, col_num):
    if v[i]==1:
        for j in range(0, (size_of_bound)):
            y_v[j] = y_v[j] + store_pows[i][j]

y_v = y_v/2

for i in range(0, size_of_bound):
    if(y_v is not 0):
        y = y*(primes[i]**y_v[i])
        

print("x is ", x, ", y is ", y)

x is  3480 , y is  11.0


In [40]:
k = math.gcd(x -int(y), the_number)
print(k)
print (int(the_number/k)) 

3469
3491


In [41]:

### works for 5429 = 61*89
### works for 8633 = 89 * 97 
### works for 8051 = 83 *97 
### works for 2257 = 61*37
### 1 817 429 = 1579*1151 - works
### 12 110 279 = 3469*3461 - works
### 62 615 533 = 7907*7919 - works 