# Inputing Elliptic Curve Data into the Even Number GAN

### Importing Packages

In [2]:
from typing import Tuple
from typing import List
import math
import unittest
import numpy as np

import torch
import torch.nn as nn

import pandas as pd

import torch.nn.functional as F

### Setting Parameters

In [3]:
max_int = 128
batch_size = 16
training_steps = 500
learning_rate = 0.001
print_output_every_n_steps = 10
#input_length = int(math.log(max_int, 2))

input_length = 20

# Number of nodes in each layer
k = 1000

### Importing Data

In [4]:
rawdata = pd.read_csv("s3://lmfdbpractice/rawdata.csv")
rawdata.columns = ['a1', 'a2', 'a3', 'a4', 'a6', 'rank']
rawdata = rawdata.astype('float')
rawdata.head()

# Data Subsets
rank0 = rawdata[rawdata['rank'] == 0]

# Filtering Data
rank0 = rank0[rank0['a4'] > -128]
rank0 = rank0[rank0['a4'] < 128]
rank0 = rank0[rank0['a6'] > -128]
rank0 = rank0[rank0['a6'] < 128]
rank0_values = rank0

In [5]:
rank0_values = rank0.drop('rank', axis=1)

rank0_values

Unnamed: 0,a1,a2,a3,a4,a6
1,0.0,-1.0,1.0,0.0,0.0
2,1.0,0.0,1.0,4.0,-6.0
3,1.0,0.0,1.0,-36.0,-70.0
5,1.0,0.0,1.0,-1.0,0.0
7,1.0,0.0,1.0,-11.0,12.0
...,...,...,...,...,...
3064009,1.0,1.0,1.0,4.0,47.0
3064231,1.0,-1.0,1.0,16.0,55.0
3064232,0.0,0.0,1.0,-2.0,15.0
3064486,1.0,-1.0,0.0,-25.0,29.0


### Making Data Binary

In [6]:
# Binary for a1
rank0_values['a1b'] = rank0_values['a1'].astype('int')
# Binary for a2
rank0_values['a2b1'] = rank0_values['a2'].apply(lambda x : 0 if x != -1 else 1) 
rank0_values['a2b2'] = rank0_values['a2'].apply(lambda x : 1 if x != 0 else 0) 
# Binary for a3
rank0_values['a3b'] = rank0_values['a3'].apply(lambda x : 1 if x != 0 else 0)
# Binary for a4 (this one starts to be more complicated)
### First I find whether it is positive or negative
rank0_values['a4b1'] = rank0_values['a4'].apply(lambda x : 1 if x < 0 else 0)
### Then I create a function that produces a binary list representation of an integer 
def create_binary_list_from_int(number: int) -> List[int]:
    """Creates a list of the binary representation of a positive integer

    Args:
        number: An integer

    Returns:
        The binary representation of the provided positive integer number as a list.
    """
    if number < 0 or type(number) is not int:
        raise ValueError("Only Positive integers are allowed")
    data = [int(x) for x in list(bin(number))[2:]]
    data = ([0] * (7 - len(data))) + data

    return data
### Then I run the integer to binary list function on the value of a4 and create column 'a4b'
rank0_values1 = rank0_values.copy()
rank0_values1['a4b'] = (rank0_values['a4'].abs().astype(int)).apply(create_binary_list_from_int)
### Then create columns from that list
a4binary = pd.DataFrame(rank0_values1['a4b'].to_list(), columns = ['a4b2', 'a4b3', 'a4b4', 'a4b5', 'a4b6', 'a4b7', 'a4b8'])
rank0_df1 = pd.concat([rank0_values.reset_index(), a4binary], axis = 1)
# Binary for a6 (similar process to a4)
rank0_df1['a6b1'] = rank0_df1['a6'].apply(lambda x : 1 if x < 0 else 0)
rank0_values2 = rank0_values.copy(deep=False)
rank0_values2['a6b'] = (rank0_values['a6'].abs().astype(int)).apply(create_binary_list_from_int)
a6binary = pd.DataFrame(rank0_values2['a6b'].to_list(), columns = ['a6b2', 'a6b3', 'a6b4', 'a6b5', 'a6b6', 'a6b7', 'a6b8'])
rank0_df = pd.concat([rank0_df1.reset_index(), a6binary], axis = 1)

In [7]:
rank0_df

Unnamed: 0,level_0,index,a1,a2,a3,a4,a6,a1b,a2b1,a2b2,...,a4b7,a4b8,a6b1,a6b2,a6b3,a6b4,a6b5,a6b6,a6b7,a6b8
0,0,1,0.0,-1.0,1.0,0.0,0.0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
1,1,2,1.0,0.0,1.0,4.0,-6.0,1,0,0,...,0,0,1,0,0,0,0,1,1,0
2,2,3,1.0,0.0,1.0,-36.0,-70.0,1,0,0,...,0,0,1,1,0,0,0,1,1,0
3,3,5,1.0,0.0,1.0,-1.0,0.0,1,0,0,...,0,1,0,0,0,0,0,0,0,0
4,4,7,1.0,0.0,1.0,-11.0,12.0,1,0,0,...,1,1,0,0,0,0,1,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34526,34526,3064009,1.0,1.0,1.0,4.0,47.0,1,0,1,...,0,0,0,0,1,0,1,1,1,1
34527,34527,3064231,1.0,-1.0,1.0,16.0,55.0,1,1,1,...,0,0,0,0,1,1,0,1,1,1
34528,34528,3064232,0.0,0.0,1.0,-2.0,15.0,0,0,0,...,1,0,0,0,0,0,1,1,1,1
34529,34529,3064486,1.0,-1.0,0.0,-25.0,29.0,1,1,1,...,0,1,0,0,0,1,1,1,0,1


In [8]:
rank0_binary = rank0_df[['a1b','a2b1','a2b2','a3b','a4b1','a4b2','a4b3','a4b4','a4b5','a4b6','a4b7','a4b8', 'a6b1',
                         'a6b2','a6b3','a6b4','a6b5','a6b6','a6b7','a6b8']].copy()

In [9]:
rank0_binary

Unnamed: 0,a1b,a2b1,a2b2,a3b,a4b1,a4b2,a4b3,a4b4,a4b5,a4b6,a4b7,a4b8,a6b1,a6b2,a6b3,a6b4,a6b5,a6b6,a6b7,a6b8
0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0
2,1,0,0,1,1,0,1,0,0,1,0,0,1,1,0,0,0,1,1,0
3,1,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
4,1,0,0,1,1,0,0,0,1,0,1,1,0,0,0,0,1,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34526,1,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,1,1,1,1
34527,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1
34528,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,1,1,1
34529,1,1,1,0,1,0,0,1,1,0,0,1,0,0,0,1,1,1,0,1


### Creating Models

In [10]:
class Generator(nn.Module):
    def __init__(self, input_length: int):
        super(Generator, self).__init__()
        self.dense_layer = nn.Linear(int(input_length), k)
        self.dense_layer2 = nn.Linear(k, k)
        self.dense_layer3 = nn.Linear(k, int(input_length))

    def forward(self, x):
        l1 = self.dense_layer(x)
        l2 = self.dense_layer2(F.relu(l1))
        l3 = self.dense_layer3(F.relu(l2))
        return F.sigmoid(l3)
    
class Discriminator(nn.Module):
    def __init__(self, input_length: int):
        super(Discriminator, self).__init__()
        self.dense_layer = nn.Linear(int(input_length), k)
        self.dense_layer2 = nn.Linear(k, k)
        self.dense_layer3 = nn.Linear(k, 1)

    def forward(self, x):
        l1 = self.dense_layer(x)
        l2 = self.dense_layer2(F.relu(l1))
        l3 = self.dense_layer3(F.relu(l2))
        return F.sigmoid(l3)

In [11]:
# Original generator/discriminator
class Generator(nn.Module):
    def __init__(self, input_length: int):
        super(Generator, self).__init__()
        self.dense_layer = nn.Linear(int(input_length), int(input_length))
        #self.activation = nn.Sigmoid()
        self.activation = nn.ReLU()

    def forward(self, x):
        return self.activation(self.dense_layer(x))
    
class Discriminator(nn.Module):
    def __init__(self, input_length: int):
        super(Discriminator, self).__init__()
        self.dense = nn.Linear(int(input_length), 1)
        self.activation = nn.Sigmoid()

    def forward(self, x):
        return self.activation(self.dense(x))

In [12]:
    # Models
generator = Generator(input_length)
discriminator = Discriminator(input_length)

### Creating Optimizers

In [13]:
    # Optimizers
generator_optimizer = torch.optim.Adam(generator.parameters(), lr=learning_rate)
discriminator_optimizer = torch.optim.Adam(discriminator.parameters(), lr=learning_rate)

### Defining Loss

In [14]:
    # loss
loss = nn.BCELoss()

### Defining functions needed

In [15]:
# Take sample from data of elliptic curves of a certain rank?

def generate_even_data(
    max_int: int, batch_size: int = 16
) -> Tuple[List[int], List[List[int]]]:
    """An infinite data generator which yields

    Args:
        max_int: The maximum input integer value
        batch_size: The size of the training batch.

    Returns:
        A Tuple with the labels and the input data.
        labels:
        data:
    """

    # Get the number of binary places needed to represent the maximum number
    max_length = int(math.log(max_int, 2))

    # Sample batch_size number of integers in range 0-max_int
    sampled_integers = np.random.randint(0, int(max_int / 2), batch_size)

    # create a list of labels all ones because all numbers are even
    labels = [1] * batch_size

    # Generate a list of binary numbers for training.
    data = [create_binary_list_from_int(int(x * 2)) for x in sampled_integers]
    data = [([0] * (max_length - len(x))) + x for x in data]

    return labels, data

In [16]:
def convert_float_matrix_to_int_list(
    float_matrix: np.array, threshold: float = 0.5
) -> List[int]:
    """Converts generated output in binary list form to a list of integers

    Args:
        float_matrix: A matrix of values between 0 and 1 which we want to threshold and convert to
            integers
        threshold: The cutoff value for 0 and 1 thresholding.

    Returns:
        A list of integers.
    """
    return [
        int("".join([str(int(y)) for y in x]), 2) for x in float_matrix >= threshold
    ]


In [17]:
def convert_float_matrix_to_int_list(
    float_matrix: np.array, threshold: float = 0.5
) -> List[int]:
    """Converts generated output in binary list form to a list of integers

    Args:
        float_matrix: A matrix of values between 0 and 1 which we want to threshold and convert to
            integers
        threshold: The cutoff value for 0 and 1 thresholding.

    Returns:
        A list of integers.
    """
    return [
        int("".join([str(int(y)) for y in x]), 2) for x in float_matrix >= threshold
    ]


In [18]:
def create_binary_list_from_int(number: int) -> List[int]:
    """Creates a list of the binary representation of a positive integer

    Args:
        number: An integer

    Returns:
        The binary representation of the provided positive integer number as a list.
    """
    if number < 0 or type(number) is not int:
        raise ValueError("Only Positive integers are allowed")

    return [int(x) for x in list(bin(number))[2:]]

In [19]:
def Extract(lst):
    c1 = [int("".join([str(int(y)) for y in x]), 2) for x in G_of_noise >= threshold]

    #return [item[0] for item in lst]

In [23]:
# First Coefficient
c1 = int(G_numpy[0][0].round())
c1

NameError: name 'G_numpy' is not defined

In [21]:
# Second Coefficient
c2 = (-1)**(int(G_numpy[0][1].round()))*(G_numpy[0][2].round())
c2

NameError: name 'G_numpy' is not defined

In [22]:
# Third Coefficient
c3 = int(G_numpy[0][3].round())
c3

NameError: name 'G_numpy' is not defined

In [None]:
# Fourth Coefficient
c4 = (-1)**(int(G_numpy[0][4].round()))*(int("".join([str(int(y)) for y in G_numpy[0][5:12].round()]), 2))
c4

In [261]:
# Fifth Coefficient
c6 = (-1)**(int(G_numpy[0][12].round()))*(int("".join([str(int(y)) for y in G_numpy[0][13:].round()]), 2))
c6

-27

In [281]:
def extract(G_of_noise):

    G_numpy = G_of_noise.detach().numpy()    

    curves = []

    for i in range(len(G_numpy)):
        c1 = int(G_numpy[i][0].round())
        c2 = (-1)**(int(G_numpy[i][1].round()))*(G_numpy[i][2].round())
        c3 = int(G_numpy[i][3].round())
        c4 = (-1)**(int(G_numpy[i][4].round()))*(int("".join([str(int(y)) for y in G_numpy[i][5:12].round()]), 2))
        c6 = (-1)**(int(G_numpy[i][12].round()))*(int("".join([str(int(y)) for y in G_numpy[i][13:].round()]), 2))
    
        coef = [c1,c2,c3,c4,c6]
        curves.append(coef)
    return curves

In [267]:
curves

[[1, 1.0, 0, -11, -27],
 [1, 1.0, 0, -11, -27],
 [1, 1.0, 0, -11, -27],
 [1, 1.0, 0, -11, -27],
 [1, 1.0, 0, -11, -27],
 [1, 1.0, 0, -11, -27],
 [1, 1.0, 0, -11, -27],
 [1, 1.0, 0, -11, -27],
 [1, 1.0, 0, -11, -27],
 [1, 1.0, 0, -11, -27],
 [1, 1.0, 0, -11, -27],
 [1, 1.0, 0, -11, -27],
 [1, 1.0, 0, -11, -27],
 [1, 1.0, 0, -11, -27],
 [1, 1.0, 0, -11, -27],
 [1, 1.0, 0, -11, -27]]

In [264]:
len(G_numpy)

16

In [262]:
G_numpy[0][13:].round()

array([0., 0., 1., 1., 0., 1., 1.], dtype=float32)

In [236]:
c1 = [int("".join([str(int(y)) for y in x]), 2) for x in G_numpy[0][0] >= 0.5]
c1

TypeError: 'numpy.bool_' object is not iterable

In [233]:
G_numpy = G_of_noise.detach().numpy()

In [244]:
G_numpy.round()

array([[1., 0., 1., 0., 1., 0., 0., 0., 1., 0., 1., 1., 1., 0., 0., 1.,
        1., 0., 1., 1.],
       [1., 0., 1., 0., 1., 0., 0., 0., 1., 0., 1., 1., 1., 0., 0., 1.,
        1., 0., 1., 1.],
       [1., 0., 1., 0., 1., 0., 0., 0., 1., 0., 1., 1., 1., 0., 0., 1.,
        1., 0., 1., 1.],
       [1., 0., 1., 0., 1., 0., 0., 0., 1., 0., 1., 1., 1., 0., 0., 1.,
        1., 0., 1., 1.],
       [1., 0., 1., 0., 1., 0., 0., 0., 1., 0., 1., 1., 1., 0., 0., 1.,
        1., 0., 1., 1.],
       [1., 0., 1., 0., 1., 0., 0., 0., 1., 0., 1., 1., 1., 0., 0., 1.,
        1., 0., 1., 1.],
       [1., 0., 1., 0., 1., 0., 0., 0., 1., 0., 1., 1., 1., 0., 0., 1.,
        1., 0., 1., 1.],
       [1., 0., 1., 0., 1., 0., 0., 0., 1., 0., 1., 1., 1., 0., 0., 1.,
        1., 0., 1., 1.],
       [1., 0., 1., 0., 1., 0., 0., 0., 1., 0., 1., 1., 1., 0., 0., 1.,
        1., 0., 1., 1.],
       [1., 0., 1., 0., 1., 0., 0., 0., 1., 0., 1., 1., 1., 0., 0., 1.,
        1., 0., 1., 1.],
       [1., 0., 1., 0., 1., 0.

In [159]:
a1_output = Extract(G_of_noise)
convert_float_matrix_to_int_list(a1_output)

TypeError: '>=' not supported between instances of 'list' and 'float'

In [160]:
a1_output

[tensor(0., grad_fn=<SelectBackward>),
 tensor(0., grad_fn=<SelectBackward>),
 tensor(0., grad_fn=<SelectBackward>),
 tensor(0., grad_fn=<SelectBackward>),
 tensor(0., grad_fn=<SelectBackward>),
 tensor(0., grad_fn=<SelectBackward>),
 tensor(0., grad_fn=<SelectBackward>),
 tensor(0., grad_fn=<SelectBackward>),
 tensor(0., grad_fn=<SelectBackward>),
 tensor(0., grad_fn=<SelectBackward>),
 tensor(0., grad_fn=<SelectBackward>),
 tensor(0.0204, grad_fn=<SelectBackward>),
 tensor(0., grad_fn=<SelectBackward>),
 tensor(0., grad_fn=<SelectBackward>),
 tensor(0., grad_fn=<SelectBackward>),
 tensor(0., grad_fn=<SelectBackward>)]

In [145]:
convert_float_matrix_to_int_list(G_of_noise[[1][0:5]])

[66962]

In [153]:
convert_float_matrix_to_int_list(G_of_noise[[1]])

[173833]

In [None]:
def Extract(lst):
    return [item[0] for item in lst]

In [218]:
G_of_noise[0][0:2]

tensor([0.9994, 0.0026], grad_fn=<SliceBackward>)

In [278]:
convert_float_matrix_to_int_list(G_of_noise)

[506459,
 506459,
 506459,
 506459,
 506459,
 506459,
 506459,
 506459,
 506459,
 506459,
 506459,
 506459,
 506459,
 506459,
 506459,
 506459]

In [120]:
convert_float_matrix_to_int_list(G_of_noise[[0]])

[198034]

In [None]:
rank0_binary.sample(16)

### Training Steps

In [306]:
for i in range(training_steps):
        # zero the gradients on each iteration
    generator_optimizer.zero_grad()

    # Create noisy input for generator
    # Need float type instead of int
    noise = torch.randint(0, 2, size=(batch_size, input_length)).float()
    
    # Generate examples of even real data
    true_labels = [1] * batch_size
    true_data = rank0_binary.sample(16).values
    true_labels = torch.tensor(true_labels).float()
    true_data = torch.tensor(true_data).float()
    
    # Train the generator
    # We invert the labels here and don't train the discriminator because we want the generator
    # to make things the discriminator classifies as true.
    G_of_noise = generator(noise)
    D_of_G_of_noise = discriminator(G_of_noise)
    generator_loss = loss(D_of_G_of_noise, true_labels)
    generator_loss.backward()
    generator_optimizer.step()

    # Train the discriminator on the true/generated data
    discriminator_optimizer.zero_grad()
    true_discriminator_out = discriminator(true_data)
    true_discriminator_loss = loss(true_discriminator_out, true_labels)

    # add .detach() here think about this
    generator_discriminator_out = discriminator(G_of_noise.detach())
    generator_discriminator_loss = loss(generator_discriminator_out, torch.zeros(batch_size))
    discriminator_loss = (true_discriminator_loss + generator_discriminator_loss) / 2
    discriminator_loss.backward()
    discriminator_optimizer.step()
    if i % print_output_every_n_steps == 0:
        print(extract(G_of_noise))
        print(G_of_noise.round())
        print(noise)
        #print(discriminator_loss.)

[[0, -0.0, 0, -4, 124], [0, -0.0, 0, -4, 124], [0, -0.0, 0, -4, 124], [0, -0.0, 0, -4, 124], [0, -0.0, 0, -4, 124], [0, -0.0, 0, -4, 124], [0, -0.0, 0, -4, 124], [0, -0.0, 0, -4, 124], [0, -0.0, 0, -4, 124], [0, -0.0, 0, -4, 124], [0, -0.0, 0, -4, 124], [0, -0.0, 0, -4, 124], [0, -0.0, 0, -4, 124], [0, -0.0, 0, -4, 124], [0, -0.0, 0, -4, 124], [0, -0.0, 0, -4, 124]]
tensor([[0., 1., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 1., 1., 1., 1., 1.,
         0., 0.],
        [0., 1., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 1., 1., 1., 1., 1.,
         0., 0.],
        [0., 1., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 1., 1., 1., 1., 1.,
         0., 0.],
        [0., 1., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 1., 1., 1., 1., 1.,
         0., 0.],
        [0., 1., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 1., 1., 1., 1., 1.,
         0., 0.],
        [0., 1., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 1., 1., 1., 1., 1.,
         0., 0.],
        [0., 1., 0., 0., 1., 0., 0., 

In [None]:
g