In [1]:
import numpy as np 
import pandas as pd
import torch


print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.current_device())
if torch.cuda.is_available():
  print(torch.cuda.get_device_name(0))

True
4
0
NVIDIA RTX A6000


In [7]:
import pandas as pd

df = pd.read_csv("../Feynman_with_units/III.17.37", 
                 sep=r"\s+",  # whitespace separator
                 header=None) # no header row
print(df.shape)


(1000000, 4)


In [8]:
df.head()

Unnamed: 0,0,1,2,3
0,3.11615,4.26126,4.853011,4.977278
1,4.396022,1.976419,3.177826,-4.286656
2,4.555049,3.215873,2.320554,-5.427313
3,1.334108,2.601124,4.351005,0.107158
4,3.796019,3.435712,3.874036,-5.901289


In [9]:
subset_df=df.head(1000)

In [14]:
import math

def encode_p1000(number):
    # Determine the sign token
    sign_token = '-' if number < 0 else '+'
    
    # Work with the absolute value for scientific notation
    abs_num = abs(number)
    
    # Handle zero as a special case
    if abs_num == 0:
        return [sign_token, "000", "E+0"]
    
    # Compute exponent: floor(log10(abs_num))
    exponent = int(math.floor(math.log10(abs_num)))
    
    # Normalize the number to get the mantissa in [1, 10)
    normalized = abs_num / (10 ** exponent)
    
    # Scale the mantissa to the range [0, 1000)
    scaled_mantissa = normalized * 100
    
    # Round to the nearest integer
    rounded_mantissa = int(round(scaled_mantissa))
    
    # Adjust in case rounding pushes the value to 1000
    if rounded_mantissa >= 1000:
        rounded_mantissa = 100
        exponent += 1
    
    # Format the mantissa as a three-digit string
    mantissa_token = f"{rounded_mantissa:03d}"
    
    # Create the exponent token with sign
    exponent_sign = '+' if exponent >= 0 else '-'
    exponent_token = f"E{exponent_sign}{abs(exponent)}"
    
    return [sign_token, mantissa_token, exponent_token]

# Example usage:
number = -1234.56
encoded = encode_p1000(number)
print("Encoded representation:", encoded)

Encoded representation: ['-', '123', 'E+3']


In [15]:
import math

def encode_p1000(number):
    # Determine the sign token
    sign_token = '-' if number < 0 else '+'
    
    # Work with the absolute value for scientific notation
    abs_num = abs(number)
    
    # Handle zero as a special case
    if abs_num == 0:
        return [sign_token, "000", "E+0"]
    
    # Compute exponent: floor(log10(abs_num))
    exponent = int(math.floor(math.log10(abs_num)))
    
    # Normalize the number to get the mantissa in [1, 10)
    normalized = abs_num / (10 ** exponent)
    
    # Optionally, round normalized to a fixed precision (e.g., 3 significant digits)
    normalized = round(normalized, 3)
    
    # Map normalized from [1, 10) to [0, 9)
    shifted = normalized - 1  # now in [0, 9)
    
    # Scale the shifted value to [0, 999]
    # Multiply by (999 / 9) = 111
    mantissa = int(round(shifted * (999 / 9)))
    
    # Ensure mantissa is within [0, 999]
    if mantissa > 999:
        mantissa = 999
    
    # Format the mantissa as a three-digit string
    mantissa_token = f"{mantissa:03d}"
    
    # Create the exponent token with sign (e.g., "E+3" or "E-2")
    exponent_token = f"E{'+' if exponent >= 0 else ''}{exponent}"
    
    return [sign_token, mantissa_token, exponent_token]

# Example usage:
number = -1234.56
encoded = encode_p1000(number)
print("Encoded representation:", encoded)


Encoded representation: ['-', '026', 'E+3']


In [16]:
import numpy as np

def batch_encode_p1000(numbers):
    """
    Encode a batch of numbers using the P1000 scheme.
    
    Parameters:
      numbers (np.ndarray): Array of numbers to encode.
      
    Returns:
      sign_tokens (np.ndarray): Array of sign tokens ('+' or '-').
      mantissa_tokens (np.ndarray): Array of mantissa tokens as zero-padded strings.
      exponent_tokens (np.ndarray): Array of exponent tokens as strings (e.g., 'E+3').
    """
    # Convert input to numpy array
    numbers = np.array(numbers, dtype=float)  # Ensure floating-point
    
    # Sign tokens: vectorized assignment based on the sign of the numbers
    sign_tokens = np.where(numbers < 0, '-', '+')
    
    # Work with absolute values
    abs_numbers = np.abs(numbers)
    
    # Handle zeros separately to avoid log10 issues
    non_zero = abs_numbers > 0
    exponent = np.zeros_like(abs_numbers, dtype=int)
    normalized = np.zeros_like(abs_numbers, dtype=float)
    
    # Compute exponent where numbers are non-zero
    exponent[non_zero] = np.floor(np.log10(abs_numbers[non_zero])).astype(int)
    
    # Compute normalized values for non-zero numbers - using float powers to avoid integer power error
    normalized[non_zero] = abs_numbers[non_zero] / np.power(10.0, exponent[non_zero])
    
    # Scale the normalized values to get mantissa in [0, 1000)
    # Multiply by 100 since normalized values are in [1, 10)
    mantissas = np.round(normalized * 100).astype(int)
    
    # Correct any mantissa that rounds to 1000
    overflow = mantissas >= 1000
    if np.any(overflow):
        mantissas[overflow] = 100
        exponent[overflow] += 1
    
    # Format mantissas as 3-digit strings
    mantissa_tokens = np.array([f"{m:03d}" for m in mantissas])
    
    # Format exponent tokens as strings (e.g., 'E+3' or 'E-2')
    exponent_tokens = np.array([f"E{'+' if e >= 0 else ''}{e}" for e in exponent])
    
    return sign_tokens, mantissa_tokens, exponent_tokens

# Example usage:
numbers = [-1234.56, 0, 56789.0, 3.14159, -0.00123]
signs, mantissas, exponents = batch_encode_p1000(numbers)

# Display encoded outputs
for num, s, m, e in zip(numbers, signs, mantissas, exponents):
    print(f"{num} -> [{s}, {m}, {e}]")

-1234.56 -> [-, 123, E+3]
0 -> [+, 000, E+0]
56789.0 -> [+, 568, E+4]
3.14159 -> [+, 314, E+0]
-0.00123 -> [-, 123, E-3]


In [17]:
import numpy as np

def batch_encode_p1000(numbers):
    """
    Encode a batch of numbers using the P1000 scheme.
    
    Parameters:
      numbers (np.ndarray): Array of numbers to encode.
      
    Returns:
      sign_tokens (np.ndarray): Array of sign tokens ('+' or '-').
      mantissa_tokens (np.ndarray): Array of mantissa tokens as zero-padded strings.
      exponent_tokens (np.ndarray): Array of exponent tokens as strings (e.g., 'E+3').
    """
    # Convert input to numpy array with float type
    numbers = np.array(numbers, dtype=float)
    
    # Determine sign tokens: '-' for negative, '+' for non-negative
    sign_tokens = np.where(numbers < 0, '-', '+')
    
    # Work with absolute values for further calculations
    abs_numbers = np.abs(numbers)
    
    # Initialize arrays for exponent and normalized values
    exponent = np.zeros_like(abs_numbers, dtype=int)
    normalized = np.zeros_like(abs_numbers, dtype=float)
    
    # Identify non-zero numbers to avoid log10 issues with 0
    non_zero = abs_numbers > 0
    
    # Compute exponent as floor(log10(abs(number))) for non-zero numbers
    exponent[non_zero] = np.floor(np.log10(abs_numbers[non_zero])).astype(int)
    
    # Compute normalized values for non-zero numbers: number / 10^exponent
    normalized[non_zero] = abs_numbers[non_zero] / np.power(10.0, exponent[non_zero])
    
    # Isolate the fractional part of the normalized value by subtracting 1
    shifted = normalized - 1  # Now in [0, 9)
    
    # Scale the shifted value to the range [0, 999]
    factor = 999 / 9  # equals 111
    mantissas = np.round(shifted * factor).astype(int)
    
    # Correct any mantissa that might exceed 999 due to rounding
    overflow = mantissas > 999
    if np.any(overflow):
        mantissas[overflow] = 999
        exponent[overflow] += 1
    
    # Format the mantissas as three-digit zero-padded strings
    mantissa_tokens = np.array([f"{m:03d}" for m in mantissas])
    
    # Format exponent tokens as strings (e.g., 'E+3' or 'E-2')
    exponent_tokens = np.array([f"E{'+' if e >= 0 else ''}{e}" for e in exponent])
    
    return sign_tokens, mantissa_tokens, exponent_tokens

# Example usage:
numbers = [-1234.56, 0, 56789.0, 3.14159, -0.00123]
signs, mantissas, exponents = batch_encode_p1000(numbers)

# Display encoded outputs
for num, s, m, e in zip(numbers, signs, mantissas, exponents):
    print(f"{num} -> [{s}, {m}, {e}]")


-1234.56 -> [-, 026, E+3]
0 -> [+, -111, E+0]
56789.0 -> [+, 519, E+4]
3.14159 -> [+, 238, E+0]
-0.00123 -> [-, 026, E-3]


In [19]:
import math

def decode_p1000(sign_token, mantissa_token, exponent_token):
    """
    Decode a single number from its P1000 encoded triplet.
    
    Parameters:
      sign_token (str): '+' or '-'
      mantissa_token (str): three-digit string representing the mantissa
      exponent_token (str): string in the form 'E+3' or 'E-2'
      
    Returns:
      float: the decoded number.
    """
    # Special case: if encoded as zero
    if mantissa_token == "000" and exponent_token == "E+0":
        return 0.0
    
    # Parse the mantissa token as integer
    mantissa_int = int(mantissa_token)
    
    # Parse the exponent value (remove the 'E' and convert to integer)
    exponent = int(exponent_token[1:])
    
    # Reverse the scaling: we previously scaled by 111 (999/9)
    # The fractional part is then recovered by dividing by 111
    fractional_part = mantissa_int / 111.0
    
    # Add back the 1 we subtracted during encoding to get the normalized value
    normalized = 1 + fractional_part
    
    # Reconstruct the original number magnitude
    number = normalized * (10 ** exponent)
    
    # Apply the sign
    if sign_token == '-':
        number = -number
    
    return number

# Example usage:
encoded_examples = [
    ('-', "026", "E+3"),  # expected to correspond to a value near -1234.56
    ('+', "000", "E+0"),  # zero
    ('+', "XYZ", "E+3")   # Example: replace XYZ with an actual token from your encoding
]

# For the first example:
decoded_number = decode_p1000('-', "026", "E+3")
print("Decoded number for ['-', '026', 'E+3']:", decoded_number)


Decoded number for ['-', '026', 'E+3']: -1234.2342342342342


In [20]:
import numpy as np

def batch_decode_p1000(sign_tokens, mantissa_tokens, exponent_tokens):
    """
    Decode a batch of numbers encoded using the P1000 scheme.
    
    Parameters:
      sign_tokens (np.ndarray): Array of sign tokens ('+' or '-').
      mantissa_tokens (np.ndarray): Array of mantissa tokens as zero-padded strings.
      exponent_tokens (np.ndarray): Array of exponent tokens as strings (e.g., 'E+3').
      
    Returns:
      np.ndarray: Array of decoded numbers.
    """
    decoded_numbers = []
    for s, m, e in zip(sign_tokens, mantissa_tokens, exponent_tokens):
        decoded_numbers.append(decode_p1000(s, m, e))
    return np.array(decoded_numbers)

# Example batch decoding:
signs = np.array(['-', '+', '+'])
mantissas = np.array(["026", "000", "123"])  # Use appropriate mantissa tokens for your examples
exponents = np.array(["E+3", "E+0", "E+3"])
decoded_batch = batch_decode_p1000(signs, mantissas, exponents)
print("Batch decoded numbers:", decoded_batch)


Batch decoded numbers: [-1234.23423423     0.          2108.10810811]
