# Meanders, buckets and signatures

## Load

In [None]:
import matplotlib.pyplot as pyplot
import numpy
import pandas

_columns: list[str] = ['oeisID','n','boundary','buckets','arcCodes','arcCodeBitWidth','crossingsBitWidth']

df: pandas.DataFrame = pandas.read_csv('nBoundaryArc.csv', engine='pyarrow')
df.columns = _columns

## Add columns

In [None]:
df['nLess_boundary'] = df['n'] - df['boundary']
df['odd_boundary'] = df['boundary'] & 1
df['odd_n'] = df['n'] & 1
df['odd_nLess_boundary'] = df['nLess_boundary'] & 1

df.loc[df['oeisID'] == 'A000682', 'initialArcCodes'] = (df['n'] // 2) + 2
df.loc[df['oeisID'] == 'A005316', 'initialArcCodes'] = 1

df.loc[df['oeisID'] == 'A000682', 'initialArcCodesBitWidthMean'] = 4 + df['n'] % 2 + df['n']
df.loc[df['oeisID'] == 'A005316', 'initialArcCodesBitWidthMean'] = 4 - df['n'] % 2 + 1

df.loc[df['oeisID'] == 'A000682', 'initialArcCodesBitPopulationMean'] = 4 + df['n'] % 2 + df['n']
df.loc[df['oeisID'] == 'A005316', 'initialArcCodesBitPopulationMean'] = 4 + df['n'] % 2 - 1

df['initialArcCodesBitWidthTotal'] = df['initialArcCodes'] * df['initialArcCodesBitWidthMean']
df['initialArcCodesBitPopulationTotal'] = df['initialArcCodes'] * df['initialArcCodesBitPopulationMean']
df: pandas.DataFrame = df.sort_values(by=['oeisID', 'n', 'boundary'], ascending=[True, True, False])
def addColumnsGrowing(groupBy: pandas.DataFrame) -> pandas.DataFrame:
    groupBy['bucketsGrowing'] = groupBy['buckets'].diff().gt(0).fillna(True)
    groupBy['arcCodesGrowing'] = groupBy['arcCodes'].diff().gt(0).fillna(True)
    return groupBy
df = df.groupby(['oeisID', 'n'], group_keys=False).apply(addColumnsGrowing)

# Data playground

In [None]:
df = df[((df['bucketsGrowing'] == True)
					& (df['oeisID'] == 'A005316')
                    & (df['odd_n'])
                    & (~df['odd_boundary'])
                    )]

# Facts (?)

In [None]:
from typing import Any
from mapFolding.reference.A005316facts import (
	bucketsIf_boundary_EVEN_by_nLess_boundary, bucketsIf_boundary_ODD_by_nLess_boundary)
from pprint import pprint

# NOTE I strongly suspect that a better analysis starts with bifurcating on n is odd, then bifurcating the bifurcations on
# boundary is odd, and finally ordering by nLess_boundary. But I have not made those tables yet.

parameters_pprint: dict[str, Any] = dict(indent=4, width=80, compact=False, sort_dicts=False, underscore_numbers=True)

pprint(bucketsIf_boundary_ODD_by_nLess_boundary, **parameters_pprint)
pprint(bucketsIf_boundary_EVEN_by_nLess_boundary, **parameters_pprint)

### My perspective
I examined these values so I could solve the pragmatic problem of allocating memory during computation. Therefore, I have tended to focus maxima of buckets and signatures at each boundary, and I have treated n as a second-tier independent variable. I will likely change this to n as the first-rate variable and return boundary to its role as a "subsection" of n. 

### Initial conditions
A005316, closed meanders, always starts with 1 arcCode. A000682, in contrast, always starts with 2 *or more* arcCodes: `arcCodes = (n // 2) + 2`. This difference makes it easier to analyze A005316 than A000682.

Furthermore, there are only 2 possible initial values of the 1 arcCode. If n is odd, arcCode = 15. If n is even, arcCode = 22. This simplicity is an advantage when analyzing A005316.

### Total non-unique buckets is a function of n, boundary, and ?

If we compare the total number of non-unique signatures (bucketsTotal) at the same boundary across all values of n we can compute, we see three general trends. For a given boundary, as (n - boundary) increases:
1. bucketsTotal increases through a specific series of values.
2. At roughly boundary = n/2, bucketsTotal increases at an ever decreasing rate.
3. At roughly boundary = 2n, bucketsTotal stops increasing.

### More facts
- For example, for n >= 4 and boundary=1, bucketsTotal=3. Always. (The maximums also apply to A000682: in this case, for n >= 5 and boundary=1, bucketsTotal=3.)
- From n-boundary>=1 to boundary<=(n+1)//2, the value of buckets has two or four series: at least a series for odd and even values of boundary, and those might be bifurcated into odd and even values of n.
- In each series, it seems easier to describe the growth by dividing the series into odd and even values of n-boundary.
- If boundary is odd, bucketsTotal is maxed out at (boundary*2+2 <= n-boundary)
- If boundary is even, bucketsTotal is maxed out at (boundary*2+1 <= n-boundary)
- or boundary ≤ (n - 1 - (boundary % 2)) // 3

# Visualize

In [None]:
# pyright: basic
fig, axes = pyplot.subplots(1, 1, figsize=(14, 9), sharex=True)
for groupByValue, dataframeGroupBy in df.groupby('boundary'):
    axes.plot(dataframeGroupBy['nLess_boundary'], numpy.log1p(dataframeGroupBy['buckets']), label=f"{groupByValue}")
# for groupByValue, dataframeGroupBy in dfB.groupby('boundary'):
#     axes[1].plot(dataframeGroupBy['nLess_boundary'], numpy.log1p(dataframeGroupBy['buckets']), label=f"{groupByValue}")
axes.set_ylabel('log1p(buckets)')
axes.set_xlabel('n')
axes.legend(ncol=4, fontsize=8)
pyplot.tight_layout()
pyplot.show()

# Tasks

## Make the unique signatures dictionary. 

```python
signatures: dict[str, dict[int, dict[int, int]]]
```

			{oeisID: {n: {boundary: arcCodesTotal}}}

In [None]:
from pprint import pprint
df: pandas.DataFrame = df.drop(columns=['crossingsBitWidth', 'arcCodeBitWidth', 'buckets'])
df = df.sort_values(['oeisID', 'n', 'boundary'])
ImaDictionary = df.groupby('oeisID').apply(
    lambda x: x.groupby('n').apply(
        lambda y: y.set_index('boundary')['arcCodes'].to_dict()
    ).to_dict()
).to_dict()

pprint(ImaDictionary, indent=0, width=120, compact=True, sort_dicts=False)

# Dumping ground for miscellaneous cells

### Pivot the data

In [None]:
# NOTE This is a little old. Watch out, for example, for accidentally collating data from A000682 and A005316.
# Pivot the data to create new columns for each value of 'boundary'
# Flatten the MultiIndex columns and append "_boundaryX" to column names
# Reset the index to make 'nLess_boundary' a column and sort by 'nLess_boundary'
def pivotOn_nLess_boundary(dataframeTarget: pandas.DataFrame) -> pandas.DataFrame:
	pivoted: pandas.DataFrame = dataframeTarget.pivot(index='nLess_boundary', columns='boundary', values=['n', 'buckets'])
	pivoted.columns = [f"{column[0]}_boundary{column[1]}" for column in pivoted.columns]
	pivoted = pivoted.reset_index().sort_values(by='nLess_boundary', ascending=True)
	pivoted = pivoted.set_index('nLess_boundary').reset_index()
	return pivoted
df: pandas.DataFrame = pivotOn_nLess_boundary(df)

### Useful boolean selectors

In [None]:
# nLess_boundary: int = state.n - state.boundary
# boundaryIsOdd: bool = bool(state.boundary & 1)
# nLess_boundaryIsOdd: bool = bool(nLess_boundary & 1)
# boundaryIsEven: bool = not boundaryIsOdd

# bucketsTotalAtMaximum: bool = state.boundary <= ((state.n - 1 - (state.boundary % 2)) // 3)
# bucketsTotalGrowsExponentially: bool = state.boundary > nLess_boundary
# bucketsTotalGrowsLogarithmically: bool = state.boundary > ((state.n - (state.n % 3)) // 3)