In [2]:
# This program is designed to provide data a heat map and a maxima graph for S1 from a .json file.

# This cell is composed of the necessary imports and class formations for the program.

# Used for math opperations:
import numpy as np

# Used for plotting data:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d
from scipy.optimize import curve_fit

# Used to read outside files:
from msgspec.json import decode
from msgspec import Struct
from typing import Dict

# Used for making optional arguments with specified data types:
from typing import Optional as Opt

# The following classes allows for specifying a path for decoidng the .json file in getData below:

class Nval(Struct):
    trialData: list # Assigns to nval an attrinute trialData of type list.
    meanOfMaxima: float
            
class Info(Struct):
    n: Dict[str, Nval] # Assigns to Info an attribute n of type Dict from str to Nval.

class Access(Struct):
    info: Info # Assigns to Access an attribute info of type Info.

# Data Intake

In [3]:
'''This cell reads and decodes a .json file to efficiently access its information. 
getData takes in a .json file of the type where s is fixed, as in template.json and only 
decodes the part of the file whose class is Access. Access has an attribute called 
'info' of type Info, which has an attribute 'n' that is a dictionary whose keys are strings, 
specifically the available nvalues, and whose values have type Nval, and therefore have as 
attributes a list of trials called 'trialData' and a float called meanOfMaxima. 
This method allows only the path we want: info > n > str(nval) > trialData > trial.'''

def getData(file: str) -> Access: # The .json file this accepts must be of the type where s is fixed.
    with open(file, 'rb') as f:
         return decode(f.read(), type = Access)
        
# In another module, FullData_S1.pynb, we will specify:
# nvalsSTR = list(data.info.n.keys())
# nvals = [int(val) for val in nvalsSTR]
# nvalsORD = sorted(nvals)

In [4]:
'''This cell breaks the nvalues up into quartiles that allow us to obtain sample points that 
are not all clustered together. getQuartile takes in a list 'nvals' of available n values (as integers) 
in the .json file, and a quantile marker q (as a float), and returns the smallest number in nvals 
greater than or equal to the floor of the quantile q.'''

def getQuartile(nvals: list[int], q: float) -> int:
    # We specify int and take the floor because np.quantile may return a float.
    quartile = int(np.floor(np.quantile(nvals, q))) 
    while quartile not in nvals:
        quartile += 1
    return quartile

# getQuartiles takes in nvals and returns a list of quartiles associated to the quantiles q listed below.

def getQuartiles(nvals: list[int]) -> list[int]:
    return [getQuartile(nvals, q) for q in [0.25, 0.5, 0.75, 1]]

In [5]:
'''This cell creates theta values that will be used to parametrically create a scatter plot of our trials,
which we can then approximate by curve fitting. getTheta takes in a positive integer 'n' and returns 
a list 'thetaVals' of theta values on the circle.'''

def getTheta(n: int) -> list[float]:
    theta_vals = [(2*np.pi*k)/n for k in range(n+1)]
    return theta_vals

'''getThetas takes in 'nvals' and a list 'quartiles' of quartiles, amd returns a list of 4 thetaVals 
corresponding to the quartiles.'''

def getThetas(nvals: list[int], quartiles: list[int]) -> list[list[[float]]]:
    return [getTheta(quartiles[i]) for i in range(4)]

In [6]:
'''This cell retrives the trial data for our scatter plot. getTrial_data takes in 'data', 'nvals', 'quartiles',
and a list 'trialNums' of four four positive numbers, t1-t4, where ti is the index of the trial one wants to
run for the data associated to the i'th quartile. It returns a list of trials as a list of list of floats.'''

def getTrialData(data: Access, nvals: list[int], quartiles: list[int], trialNums: list[int]) -> list[list[float]]:
    '''data.info.n accesses the dictionary 'n' that is an attribute of 'info' that is an attribute of 'data'.
    The key str(quartiles[i]), the ith quartile, is of type Nval as described above, and therefore
    has an attribute called 'trialData', which is a list of trials. We select from this list, the
    trial whose index is given by ti = trialNums[i]. Therefore, the ith element of trialData
    is the ti'th trial associated to the nval equal to the ith quartile.'''
    trialData = [list(data.info.n[str(quartiles[i])].trialData[trialNums[i]]) for i in range(4)]
    '''We append the first data point of each trial to the trial so that the trials end where they begin.
    This will allow our figures to be closed (i.e., circles).'''
    for trial in trialData:
        trial = trial.append(trial[0])
    return trialData


# Heat Map

In [7]:
'''This cell creates the heat map with createHeatMap, which has 3 required arguments and 4 optional ones.
'data' and 'nvals' are required. As is 't', which will be the default trial number for t1-t4 (an int).
If one wants, they may specify other ints for any or all of t1-t4. '''

def createHeatMap(data: Access, nvals: list[int], t: int, 
                    t1: Opt[int] = None, t2: Opt[int] = None, t3: Opt[int] = None, t4: Opt[int] = None):
    quartiles = getQuartiles(nvals) # Creates the quartiles.
    '''The i'th line will be a string of qi zeroes, where qi is the i'th quartile.
    These lines will be the list of z-values for the circles, ensuring that they all rest on the plane z = 0.'''
    lines = [[0 for i in range(qi + 1)] for qi in quartiles]
    thetas = getThetas(nvals, quartiles) # Creates the thetas.
    trialNums = [] # Creates the trial nums.
    for ti in [t1, t2, t3, t4]:
        if ti is None:
            trialNums.append(t) # Appends t as the default trial number.
        else:
            trialNums.append(ti) # Appends ti as a trial number.
    # Gets the trial data associated to the trialNums:
    trialData = getTrialData(data, nvals, quartiles, trialNums)
    fig = plt.figure(figsize = (30, 30)) # Creates a figure of 30 square inches.
    # Creates a list of four axes for 3d graphs in the 4 quadrants of the figure:     
    axes = [plt.subplot(2, 1 + i, projection='3d') for i in range(4)]
    # Creates the four 3d graphs.
    for i in range(4):
        # Plots a 3D height graph with x-, y-, and z-values cos(thetai), sin(thetai), and q_i = trialData[i].
        # 'alpha' specifies the opacity of the graph level and 'maroon' specifies the color of the points.
        axes[i].plot3D(np.cos(thetas[i]), np.sin(thetas[i]), trialData[i], alpha=0.25, c='maroon')
        # Plots a black circle with the same x- and y-values, but withz-values constantly 0.
        axes[i].plot3D(np.cos(thetas[i]), np.sin(thetas[i]), lines[i], alpha=.75, c='black')
        # Plots a heat map with the 'coolwarm' colormap on the black circle according to the values of q_i.
        axes[i].scatter3D(np.cos(thetas[i]), np.sin(thetas[i]), lines[i], cmap='coolwarm', c=trialData[i], alpha=1, s=100)
        # Labels the graph according to the i+1'st quartile.
        axes[i].set_title("n value: {}".format(quartiles[i]))
    plt.show() # Displays the graph.

# Maxima Graph

In [7]:
'''This cell creates the maxima graph. func1 is just a simple logarithm function that takes in floats 'x' and 'a'
and returns a float 'a*np.log(x)'. createMaximaGraph only requires 'data', 'nvals', 'nvalsSTR', and 'nvalsORD'. 
We will have gotten 'nvalsSTR' from the file (see first cell), and used it to create 'nvals'
and 'nvalsORD', so they just have to be passed on to the function, like 'data'.'''

# func1 is the type of equation we will want to use to approximate our Maxima Graph from a scatter plot.

def func1(x: float, a: float) -> float:
    return a*np.log(x)


# create_maxima_graph creates the Maxima Graph. It relies on data, nvals, nvalsSTR, and nvalsORD, defined above.

def createMaximaGraph(data: Access, nvals: list[int], nvalsSTR: list[str], nvalsORD: list[str]) -> None:
    # Gets the mean maxima for each n value:
    MaxMean = []
    for j in range(len(nvalsSTR)):
        MaxMean.append(data.info.n[nvalsSTR[j]].meanOfMaxima)
    # Takes the first half of MaxMean in order to create a predictive regression line:
    nShort = [nvalsORD[i] for i in range(int(np.floor(len(nvalsORD)/2)))]
    MM = [] # MM will contain the MaxMean elements corresponding to values in nshort.
    for j in range(int(np.floor(len(nvalsORD)/2))):
        index = nvals.index(nShort[j])
        MM.append(MaxMean[index])
    # Creates a regression line with half the data:
    pops, scov = curve_fit(func1, nShort, MM, maxfev=500000)
    # Creates a reggession line from all the data:
    popt, pcov = curve_fit(func1, nvalsORD, MaxMean, maxfev=500000) 
    plt.style.use('seaborn') # Defines the plot style.
    f = plt.figure(figsize = (8,8)) # Sets the plot size.
    # Plots tbe data and the line of best fit:
    plt.scatter(x=nvals,y=MaxMean,s=50)
    # Plots the regression with the full data set:
    plt.plot(nvalsORD, func1(nvalsORD, *popt), c="red")
    # Plots the regression with half data the set:
    plt.plot(nvalsORD, func1(nvalsORD, *pops), c="green")
    # Gives numbers for line of best fit equation: 
    print("For [(a)*(ln(n))] you have a values: ")
    print(popt)