# Example of using the group generation functions

**Note**: must restart kernel if code in CoxeterArtinGroupGeneration.py has been updated

In [1]:
import numpy as np
import logging
from CoxeterArtinGroupGeneration import getTimestamp, DataGenerator, loadDataset, readDataset, plotFrequencies, setup_logging

In [2]:
# Enable Debugging (optional)
setup_logging(level=logging.INFO)

In [3]:
BR = "."    # break character
# get timestamp (for job)
timestamp = getTimestamp()  #format: YYYY-MM-DD

coxeterMatrix = np.array([
    [1, 3, 3],
    [3, 1, 3],
    [3, 3, 1],
])
dg = DataGenerator(coxeterMatrix, dataDir="datasets", timestamp=timestamp, BR=BR)
dg.groupName = "A2_tilde"
dg.mode = 'coxeter'
dg.timestamp = timestamp

# define word length, dataset size, splits 
min_wordLen = 6
max_wordLen =  22
fixed_wordLen = max_wordLen
dg.datasetSize = 6000 * 2
dg.train_size = 0.3
dg.setSizes(min_wordLen, max_wordLen, fixed_wordLen)

# generate folder name for dataset using dataset features (updates folderPath)
folderName = dg.generateFolderName()
print(f"Unique folder name for dataset:\n{folderName}")
# define directory path (defined via generation or manually)
trainDF, testDF = dg.makeDataset(userDatasetPath=folderName, random_state=1)

Unique folder name for dataset:
0 . A2_tilde . 'coxeter' . 6-22 . pad 22 . size 12,000 . split 30 70


[22:45:32] INFO: Word size 6  done | Time alloted 5.00| Words Generated 6
[22:45:37] INFO: Word size 8  done | Time alloted 5.00| Words Generated 6
[22:45:42] INFO: Word size 10 done | Time alloted 5.00| Words Generated 42
[22:45:47] INFO: Word size 12 done | Time alloted 5.00| Words Generated 96
[22:45:52] INFO: Word size 14 done | Time alloted 5.00| Words Generated 365
[22:45:57] INFO: Word size 16 done | Time alloted 5.00| Words Generated 1186
[22:46:02] INFO: Word size 18 done | Time alloted 5.00| Words Generated 3455
[22:46:02] INFO: Word size 20 done | Time alloted 0.12| Words Generated 844


Training set size: 3600
Testing set size: 8400


# View Frequencies

In [4]:
import os
trivialWords = readDataset(os.path.join(dg.datasetPath, dg.trivialFile))
nontrivialWords = readDataset(os.path.join(dg.datasetPath, dg.nonTrivialFile))
print("Total Trivial Words:", len(trivialWords))

Total Trivial Words: 6000


In [7]:
plotFrequencies(trivialWords, wordType="Trivial")
plotFrequencies(nontrivialWords, wordType="Non-Trivial")