In [9]:
# This program is designed to generate a 2D maxima graph for a fixed s from a folder of .csv files.

# This cell is composed of the necessary imports and class formations for the program.

# Used for plotting data:
import matplotlib.pyplot as plt

# Used to read files:
import pandas as pd

In [1]:
'''This cell extracts a list of file names present in a folder, given the computer address of the folder. 
We need to draw a distinction between Windows devices and Mac/Linux devices because of the formatting
of addresses. When given the address of the folder, getFile returns the name of the file in the folder associated 
to a given value n and range of s values. getFiles gets all of the files with the given s range for n between
'nStart' and 'nEnd' with step size 'nStep'. '''

def getFile(address: str, sStart: str, sEnd: str, n: str) -> str:
    name = address + '/' + n + '_s' + sStart + '-' + sEnd + '.csv'
    return name


def getFiles(address: str, sStart: str, sEnd: str, nStart: int, nEnd: int, nStep: int) -> list[str] or bool:
    if nStart > nEnd:
        print("n range: start: {0} cannot be greater than end: {1}.".format(str(nStart), str(nEnd)))
        return False
    else:
        files = [getFile(address, sStart, sEnd, str(n)) for n in range(nStart, nEnd + nStep, nStep)]
        if files == []:
            print("n range: start: {0}; stop: {1}; step: {2} produces no n values.".format(
                str(nStart), str(nEnd), str(nStep), file = sys.stderr))
            return False
    return files

def getFileWindows(address: str, sStart: str, sEnd: str, n: str) -> str:
    name = address + '\\' + n + '_s' + sStart + '-' + sEnd + '.csv'
    return name


def getFilesWindows(address: str, sStart: str, sEnd: str, nStart: int, nEnd: int, nStep: int) -> list[str] or bool:
    if nStart > nEnd:
        print("n range: start: {0} cannot be greater than end: {1}".format(str(nStart), str(nEnd)))
        return False
    else:
        files = [getFileWindows(address, sStart, sEnd, str(n)) for n in range(nStart, nEnd + nStep, nStep)]
        if files == []:
            print("n range: start: {0}; stop: {1}; step: {2} produces no n values.".format(
                str(nStart), str(nEnd), str(nStep), file = sys.stderr))
            return False
    return files

In [2]:
'''This cell retrieves the relevant data from the files in the folder. getPoint takes in a file and a specified
s value, and returns the mean and n value in the row of that given s.'''


def getPoint(file: str, sStart: float, s: float) -> tuple[int, float] or str:
    colnames = ['nValue','sValue', 'expectedMean']
    try: 
        df = pd.read_csv(file, names=colnames)
    except: 
        return 'nonexistent'
    row = (s - sStart)*1000 + 1 # This will be the row associated to the given s if the stepsize is .001.
    try:
        n = df.nValue[int(row)],
        mean = df.expectedMean[int(row)]
        return n, mean
    except:
        return 'missing s'

In [3]:
'''This cell creates the 2D graph, whose x-axis is n values, and whose y-axis is E[max^n_s], given a folder of
.csv files; 'sStart', the first value in the range of s values; and a specified parameter 's'.'''

def createMaximaGraph(files: list[str], sStart: float, s: float) -> None and str:
    nVals = [] # n's for the x-axis.
    mVals = [] # Means for the y-axis
    nonexistent = [] # Keeps track of non-existent files
    missingS = []
    # For each file, we take the point obtained from that file.
    for file in files:
        if getPoint(file, sStart, s) == "nonexistent":
            nonexistent.append(file)
        elif getPoint(file, sStart, s) == "missing s":
            missingS.append(file)
        else:
            n, m = getPoint(file, sStart, s)
            nVals.append(n)
            mVals.append(m)
    f = plt.figure(figsize = (8,8)) # Sets the plot size.
    plt.scatter(x=nVals,y=mVals,s=50) # Creates the scatter plot.
    if nonexistent:
        print("The following files could not be located:")
        for file in nonexistent:
            print("{}\n".format(file))
    if missingS:
        print("The following files have no rows with parameter s: {}.".format(s))
        for file in missingS:
            print("{}\n".format(file))

In [24]:
# address = '/Users/alexanderneuschotz/Downloads/Circle'
# files = getFiles(address, '0.0', '0.5', 500, 1000, 10)

# createMaximaGraph(files, 0.0, 0.25)