# Example of using the group generation functions

In [1]:
#note: must restart kernel if CoxeterArtinGroupGeneration has been updated 
import numpy as np
from CoxeterArtinGroupGeneration import getTimestamp, DataGenerator, loadDataset, readDataset, plotFrequencies

In [5]:
# get timestamp (for job)
timestamp = getTimestamp()  #format: YYYY-MM-DD
coxeterMatrix = np.array([
    [1, 3, 2],
    [3, 1, 3],
    [2, 3, 1],
])

# create object for generating data 
dg = DataGenerator(coxeterMatrix, mode="coxeter", dataDir="datasets", timestamp=timestamp)
# define group type (mode)
dg.mode = 'coxeter'
dg.timestamp = timestamp

# define word length, dataset size, splits 
min_wordLen = 12
max_wordLen =  26
fixed_wordLen = max_wordLen
dg.datasetSize = 15000
dg.train_size = 0.3
dg.setSizes(min_wordLen, max_wordLen, fixed_wordLen)

# generate folder name for dataset using dataset features (updates folderPath)
folderName = dg.generateFolderName()
print(f"Unique folder name for dataset: {folderName}")
# define directory path (defined via generation or manually)
trainDF, testDF = dg.makeDataset(userDatasetPath=folderName, random_state=1)

Unique folder name for dataset: 0 | mode 'coxeter' | range 12 - 26 | pad 26 | size 15,000 | split 30 70
Training set size: 4500
Testing set size: 10500


# View Frequencies

In [6]:
import os
trivialWords = readDataset(os.path.join(dg.datasetPath, dg.trivialFile))
nontrivialWords = readDataset(os.path.join(dg.datasetPath, dg.nonTrivialFile))

In [7]:
plotFrequencies(trivialWords, wordType="Trivial")
plotFrequencies(nontrivialWords, wordType="Non-Trivial")