In [1]:
import math
import numpy as np
import sympy
import time 

In [2]:
def calc_time(start, end):
    seconds = end - start
    mins = seconds / 60
    print('Time:', str(seconds), '    ', str(mins))
    return seconds, mins

In [3]:
## I have kept the generator cell, but I generate more numbers in the next one. The prime number is set in this cell.
## bound is commented out, as it is already set, and the array of primes is already created

# n = 16921456439215439701
n = 12110279
the_number = n
s_const = int(math.sqrt(n)) + 1
A_const = (math.sqrt(2)-1)*math.sqrt(n)-1      

print(A_const)

1440.4559959137964


In [4]:
def f(x):
    return (x+s_const)**2 - the_number

In [5]:
# Final optimized version (200-500 times faster than the correct one above)
def get_primes_less_thanB(bound):                      # eratosthenes way
    check_until = int(bound**0.5)
    unmarked = np.array([x for x in range(2, bound)])
    q = 2
    i = 0
    while True:
        u = unmarked[i]
        if u> check_until:               # only check numbers up to sqrt(bound)
            break
        u_idx = np.where(unmarked == u)[0][0]
        max_idx = bound//u
        listof_multiples = [u*i for i in range(q, max_idx+1)]
        u_multiples = np.array(listof_multiples)
        unmarked = np.setdiff1d(unmarked, u_multiples)
        q = u
        i +=1
    # print("final unmarked elements (for eye balling!): ", unmarked)
    print("Number of elements less than bound = ", bound, " is: ", len(unmarked))
    return (unmarked)

In [6]:
start = time.time()
bound = 10000
primes_less_thanB = get_primes_less_thanB(bound)
end = time.time()

calc_time(start, end)

Number of elements less than bound =  10000  is:  1229
Time: 0.0279238224029541      0.000465397040049235


(0.0279238224029541, 0.000465397040049235)

In [7]:
bound = 100
primes = np.array(get_primes_less_thanB(bound))
print("the primes in this factor base with bound=", bound, "are: ", primes)

size_of_bound = len(primes)

Number of elements less than bound =  100  is:  25
the primes in this factor base with bound= 100 are:  [ 2  3  5  7 11 13 17 19 23 29 31 37 41 43 47 53 59 61 67 71 73 79 83 89
 97]


In [27]:
# Correct version
def check_Bsmooth(primes, num):
    powers_of_primes= []
    num_remaining= num                          # keep dividing by primes in B smooth primes, until == 1
    for prime in primes: 
        #print("current prime: ", prime)
        i = 0                                   # i = power of prime
        if (num_remaining%prime == 0):
            while (num_remaining%prime ==0):   #keep dividing for each power of the prime
                quotient = num_remaining/prime
                num_remaining = quotient
                i +=1
        powers_of_primes.append(i)
        #print("num remaining: ", num_remaining)
        if num_remaining == 1:     # ---------------(**) INDENT PROPERLY!
            diff = len(primes)-len(powers_of_primes)
            if diff>0:
                zeros = [0 for k in range(diff)]
                return powers_of_primes + zeros
            return powers_of_primes
    #print("Number is not B smooth")
    return []

### Still more checks, on B-smooth, on Sieve; 

In [9]:
# I was just falling asleep when I realized that the INDENTATION for "if num_remaining is 1" MIGHT be wrong!
# So it was unnecessarily checking, 
num = 100
bound = 100
primes = np.array(get_primes_less_thanB(bound))

start = time.time()
result = check_Bsmooth(primes, num)
end = time.time()

calc_time(start, end)


Number of elements less than bound =  100  is:  25
current prime:  2
num remaining:  25.0
current prime:  3
num remaining:  25.0
current prime:  5
num remaining:  1.0
Time: 0.0009963512420654297      1.660585403442383e-05


(0.0009963512420654297, 1.660585403442383e-05)

In [10]:
def sieve(A_const, bounded_primes):
    B_smooth_squares = []
    powers_of_primes = []
    for i in range(0, int(A_const)):
        k = f(i)
#         if i%100 ==0:
#             print("in this iteration i=", i, " candidate k is: ", k)
        exponents = check_Bsmooth(bounded_primes, k)
        if len(exponents)>0:
            B_smooth_squares.append(k)
            powers_of_primes.append(exponents)
            #print("found a B-smooth number(!) = ", k)
            #print("with prime powers: ", exponents)
        else:
            continue
    return (B_smooth_squares, powers_of_primes)

In [11]:
bound = 100
primes = np.array(get_primes_less_thanB(bound))

Number of elements less than bound =  100  is:  25


In [15]:
start = time.time()
results = sieve(A_const, primes)
end = time.time()

calc_time(start, end)
print(results[0])
#print(results[1])

Time: 0.05236530303955078      0.0008727550506591797
[121, 14045, 153725, 174746, 237917, 613210, 763465, 799370, 1197625, 1743005, 1772797, 2535650, 2658370, 3279650, 3342482, 3690346, 3985865, 4066205, 7674425, 8428745, 8884445, 9280346, 9298850, 10234250, 11519042, 11782265]


In [16]:
print("Number of B smooth candidates with bound: ", bound, " are ", len(results[0]))

Number of B smooth candidates with bound:  100  are  26


### Ignore the following 7 cells if you just want to run; start after Markdown cell again

In [17]:
bound = 120
primes = np.array(get_primes_less_thanB(bound))

Number of elements less than bound =  120  is:  30


In [18]:
start = time.time()
result = sieve(A_const, primes)
end = time.time()

calc_time(start, end)
print("Number of B smooth candidates with bound: ", bound, " are ", len(result[0]))

In [19]:
B_smooth_squares = results[0]
print(B_smooth_squares)
powers_of_primes = results[1]
# print(powers_of_primes)

In [45]:
exp_array = (np.array(powers_of_primes))%2
rownum = len(exp_array)
colnum = len(exp_array[0])
print("num rows: ", rownum, "num columns ", colnum)

exp_array = np.transpose(exp_array)
rownum = len(exp_array)
colnum = len(exp_array[0])
print("num rows: ", rownum, "num columns ", colnum)
# print(exp_array)

num rows:  53 num columns  46
num rows:  46 num columns  53


In [48]:
B_smooth_squares = results[0]
print(B_smooth_squares)
powers_of_primes = results[1]
print(powers_of_primes)

[121, 2658370, 3342482]
[[0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0], [1, 0, 1, 0, 2, 3, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 1, 2, 0, 0, 0, 1, 1, 0]]


In [37]:
# Just checking how long this alternative you had takes; seems comparable in terms of time
# But this method actually doesn't find ANY candidates k, because the check at (**) means you check 
# whether EVERY one of the primes in'B smooth primes' divide the candidate k! But that's too strict a requirement!
# It also does not check if one prime repeatedly divides k - we need to do that; e.g. 11 goes into 2658370 3 times!

def sieve2(A_const, primes):
    sieve_array = []

    for i in range(0, int(A_const)):
        k = f(i)
        #print(k)
        trigger = 0
        flag_cont = 0

        for j in range(0, len(primes)):
            if(k%primes[j]!=0):   # if any prime does not divide k ----------------(**)
                flag_cont = 1     # then set flag to 1

        if flag_cont == 1:
            flag_cont = 0
            continue
        else:
            sieve_array.append(k)

    print(len(sieve_array))
    

In [40]:
start = time.time()
result = sieve2(A_const, primes)
end = time.time()
calc_time(start, end)

0
Time: 0.05785202980041504      0.000964200496673584


(0.05785202980041504, 0.000964200496673584)

### Start here (but load sieve, checkBsmooth, primes less than B smooth)
### Checking whether we still get x is  3480 , y is  11.0 GIVEN the new mod2 matrix is DIFFERENT!

In [30]:
# Simulating as you did,  with bound 40 (get the same factorization at the end)

# start = time.time()

bound = 40
primes = np.array(get_primes_less_thanB(bound))

#results = sieve(A_const, primes)  -- this is same as what happens in the cell below?
# end = time.time()

# calc_time(start, end)   # I like to time everything :)

Number of elements less than bound =  40  is:  12


In [31]:
## this is the generator cell, it works for the small tests I am doing, but we will have to update it later
nums = []
results=[]
size_of_bound = len(primes)

numbers = []
count_found = 0
begin = int(math.sqrt(n)) + 1
i = 0
start = time.time()
while(True):
    temp = check_Bsmooth(primes, (begin**2)%n)
    if temp!=[]:
        print("temp ", temp)
        exponents = np.array(temp)
        print
        result = np.power(primes, exponents)
        the_num = np.prod(result)
        print("the number: ", the_num)
        numbers.append(the_num)
        
        nums.append(begin)
        results.append(temp)
        count_found = count_found +1
            
    if count_found> size_of_bound +3:
        break
    begin +=1
    i +=1
end = time.time()
print("iterations, ", i)
print("B smooth numbers found: ", numbers, "total: ", len(numbers))
print("Should equal nums: ", len(nums))
calc_time(start, end)
print (results)

temp  [0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0]
the number:  121
temp  [1, 0, 1, 0, 2, 3, 0, 0, 0, 0, 0, 0]
the number:  2658370
temp  [1, 0, 0, 0, 1, 2, 0, 0, 0, 1, 1, 0]
the number:  3342482
temp  [0, 1, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0]
the number:  390963
temp  [2, 0, 0, 2, 1, 0, 1, 1, 0, 0, 0, 0]
the number:  696388
temp  [0, 0, 0, 0, 0, 2, 2, 1, 0, 0, 0, 0]
the number:  927979
temp  [2, 1, 0, 1, 1, 0, 3, 0, 0, 0, 0, 0]
the number:  4539612
temp  [2, 1, 0, 4, 1, 0, 1, 0, 0, 0, 0, 0]
the number:  5387844
temp  [2, 0, 0, 2, 0, 2, 0, 2, 0, 0, 0, 0]
the number:  11957764
temp  [2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0]
the number:  484
temp  [3, 0, 1, 0, 2, 3, 0, 0, 0, 0, 0, 0]
the number:  10633480
temp  [1, 2, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0]
the number:  397854
temp  [0, 3, 0, 2, 1, 0, 0, 0, 0, 0, 0, 1]
the number:  538461
temp  [1, 2, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0]
the number:  1527246
temp  [1, 4, 1, 0, 2, 0, 0, 0, 1, 0, 0, 0]
the number:  2254230
temp  [0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0]
the numb

In [32]:
matrix = []
for result in results:
    if result != "[]":
        matrix.append(result)
np_array = np.array(matrix)

## store_pows is the original matrix of powers, which we use later in the computation of y

store_pows = np_array
np_array = np.transpose(np_array)
print(np_array %2)


[[0 1 1 0 0 0 0 0 0 0 1 1 0 1 1 0]
 [0 0 0 1 0 0 1 1 0 0 0 0 1 0 0 1]
 [0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1]
 [0 0 1 0 1 0 1 1 0 0 0 0 1 0 0 1]
 [0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 1 0 1 1 0 0 0 0 0 1 0 1]
 [0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0]]


In [33]:
a = (np_array%2)
n = len(a)            # row number + number of equations we have
m = len(a[0])        # column number + number of primes (in factor base)

b =  np.array([[0 for i in range(n)]])     # all zeros
a = np.concatenate((a, b.T), axis=1)
print(a)

[[0 1 1 0 0 0 0 0 0 0 1 1 0 1 1 0 0]
 [0 0 0 1 0 0 1 1 0 0 0 0 1 0 0 1 0]
 [0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0]
 [0 0 1 0 1 0 1 1 0 0 0 0 1 0 0 1 0]
 [0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0]
 [0 0 0 0 1 0 1 1 0 0 0 0 0 1 0 1 0]
 [0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0]]


In [34]:
# changed it to remove all zero rows from A
def zero_row(A): 
    return A[~np.all(A == 0, axis=1)]

In [35]:
## I have commented out most of the print statements, since for a large matrix the print output is a lot, but
## they can be used in further debugging.

## The only thing I changed about the function is that instead of returning rows and cols upon finding an 0 row, the zero row
## function removes all zero rows, and then the function returns the augmented matrix, so we can continu working with it.

def upp_triangular(num_rows, num_cols, aug_matA):
    A = aug_matA
    n = num_rows
    m = num_cols
    row_adds = []
    swaps = []
    for i in range(m):            # column
        if i>=n:    
            break
        A = A%2
        #print("current matrix ", A)
        maxi = i
        for k in range(i, n):                 # first row at i or below where entry is 1 - pivot for that column
            if A[k][i] ==1:
                maxi = k
                break
        if A[maxi][i] != 1:                    # no '1'- entry in any row for this column
        #    print("No pivots in this column")
            continue
        else:                                 # some row = maxi row has a 1 entry
        #    print("found a row with a 1 entry, which is: row ", maxi)
            if maxi != i:            # need to swap rows
        #        print ("Swapping rows for row = ", i, "and maxi = ", maxi)
                currenti = A[i].copy()
                A[i] = A[maxi]
                A[maxi] = currenti
        #        print("matrix overall after swap: ", A)
                swaps.append((i,maxi))
            for j in range(i+1, n):    # check every entry directly below A[i][i] 
                belowi = A[j][i]
        #        print("belowi: ", belowi)
                if belowi !=0:          # if entry is 1 for any row, nullify it
                    A[j]+= A[i]
                    row_adds.append((i,j))
    
    # should have upper triangular matrix at the end of this 
    A =zero_row(A)
    return A
    


In [36]:
from sympy import *

aug_matrix = a
row_num = len(a)
col_num = len(a[0])

L = Matrix(a)
#print(L.rref())
aug_matrix = np.array(L.rref()[0])
aug_matrix  = zero_row(aug_matrix)
#aug_matrix = upp_triangular(row_num, col_num,  aug_matrix)

print("reduced row echelon form is ")
print(aug_matrix)


reduced row echelon form is 
[[0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 -1 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0]]


In [37]:
# v: 0s are non-free variables, 1s are fixed variables which will be 1 (free or non-free),
# 2s are fixed variables which will be 0 (free or non-free)

v = np.ones(col_num, dtype = 'int')

# the row_num has changed after dropping the 0 rows
row_num = len(aug_matrix)

# find the leading entries, aka the non-free variables, set them to 0 in the v vector
for i in range(0, row_num):
    check_main = 1
    for j in range(0, col_num):
        if(aug_matrix[i][j]==1 and check_main ==1):
            v[j]=0
            break

print(v)

[1 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 1]


In [38]:
# set one of the free variable to 1, all the rest of the free variables to 2.
## THIS IS A HUGE PLACE FOR OPTIMIZATION, IF WE PICK THE RIGHT FREE VARIABLE LIFE WILL BE EASIER LATER

delete_ones=False
for i in range(0, col_num):
    if(delete_ones==True and v[i]==1):
        v[i]=2
    if(v[i]==1):
        delete_ones = True

print(v)
# calculate the other variables according to the values of the free variables we have:

for i in reversed(range(0, row_num)):
    #for j in range(0, col_num):
    sum =0
    for j in range(0, col_num):
        sum = sum + v[j]*aug_matrix[i][j];
    zer =(np.argwhere(v == 0).flatten())
    sum = sum%2;
    if(len(zer)!=0):
        index = zer[len(zer)-1]
    
        if(sum==0):
            v[index] = 2
        else:
            v[index] = 1
        
print(" final v vector is ", v%2)
        

[1 0 0 0 0 0 0 2 2 2 2 0 0 0 0 0 2]
 final v vector is  [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


In [39]:
x = 1;
y = 1;

for i in range(0, col_num):
    if v[i] ==1:
        x = x * nums[i]
        #for j in range(0, len(primes)):
        #    if(store_pows[i][j]!=0):
        #        #print(int(y*(primes[j]**store_pows[i][j])))
        #        y = (y* ((primes[j]**store_pows[i][j])))

        
y_v  = np.zeros(size_of_bound)

for i in range(0, col_num):
    if v[i]==1:
        for j in range(0, (size_of_bound)):
            y_v[j] = y_v[j] + store_pows[i][j]

y_v = y_v/2

for i in range(0, size_of_bound):
    if(y_v is not 0):
        y = y*(primes[i]**y_v[i])
        

print("x is ", x, ", y is ", y)

x is  3480 , y is  11.0


In [40]:
k = math.gcd(x -int(y), the_number)
print(k)
print (int(the_number/k)) 

3469
3491


In [41]:

### works for 5429 = 61*89
### works for 8633 = 89 * 97 
### works for 8051 = 83 *97 
### works for 2257 = 61*37
### 1 817 429 = 1579*1151 - works
### 12 110 279 = 3469*3461 - works
### 62 615 533 = 7907*7919 - works 