In [1]:
import os, sys

PROJ_DIR = os.path.abspath('../')

if PROJ_DIR not in sys.path:
    sys.path.append(PROJ_DIR)

In [2]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split

from fundamental_domain_projections.dirichlet.dirichlet_dataset import *
from data.parse_data import *

2022-07-14 00:14:19.130222: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
PROJ_FILE_NAME = 'test_proj'
MODEL_NAME = 'test_model'

MATRIX_DIM = (4, 26)

In [12]:
# --- Projection and model files for complete dataset (~78k rows) ---

# PROJ_FILE_NAME = 'dirichlet_proj'
# MODEL_NAME = 'model_simple_nn'

### Calculate and save Dirichlet projections

In [4]:
# --- Input data ---
X, Y = parse_txt_file()
X, Y = X[:200], Y[:200]  # taking a subset for quick test run

# --- Dirichlet projections ---
X_proj = DirichletDataset(X, Y, MATRIX_DIM, save_proj=True, file_name=PROJ_FILE_NAME).X_proj

Distribution of first Hodge numbers:  [(6, 1), (7, 13), (8, 113), (9, 409), (10, 1148), (11, 2395), (12, 4164), (13, 5976), (14, 7635), (15, 8578), (16, 8848), (17, 8170), (18, 7489), (19, 6049), (20, 4900), (21, 3830), (22, 2699), (23, 1972), (24, 1387), (25, 942), (26, 593), (27, 411), (28, 294), (29, 192), (30, 109), (31, 58), (32, 37), (33, 23), (34, 15), (35, 7), (36, 5), (37, 1), (38, 2), (39, 1), (42, 1)]
Fixed point (x0) used:  [[ 1.1712386   5.39716452 -3.36204844 -7.62748005 -6.99759324  3.54881636
   4.12697184  1.07979268  5.2350386  -7.34003505  5.21495319 -6.79695338
   2.38286511  0.2248728   1.1146488  -7.41698295  2.23264106  0.55651819
  -1.47618912 -3.64904178 -4.29466839 -7.17929626 -3.865437    3.60472616
  -1.71685172  0.50640782]
 [ 3.03846043  2.3324441  -6.99972747  0.66465547 -3.62255934 -6.25241803
  -4.85942422 -1.61195055  5.86649164 -2.04314667  5.48655367 -4.89406694
   1.34003729  5.83557079  4.46544163 -1.90714389  1.85469414  4.33725819
  -6.39385325  

  if x0=='Daniel':
100%|█████████████████████████████████████████| 200/200 [00:00<00:00, 510.83it/s]


...finished Dirichlet projection calculation.
Saving projection to /Users/pragyasingh/PycharmProjects/LOGML2/orig_repo/kreuzer-skarke-ML/data/raw/test_proj


### Load Dirichlet projections

In [5]:
diric_proj = DirichletDataset(load_proj=True, file_name=PROJ_FILE_NAME)
X, Y, X_proj = diric_proj.X, diric_proj.Y, diric_proj.X_proj

Loading projection from /Users/pragyasingh/PycharmProjects/LOGML2/orig_repo/kreuzer-skarke-ML/data/raw/test_proj.npz ...
...finished loading


### Train ML model

In [6]:
def get_nn():
    inp = tf.keras.layers.Input(shape=(4, 26,))
    prep = tf.keras.layers.Reshape((4 * 26,))(inp)
    h1 = tf.keras.layers.Dense(100, activation='relu')(prep)
    h2 = tf.keras.layers.Dense(50, activation='relu')(h1)
    h3 = tf.keras.layers.Dense(50, activation='relu')(h2)
    out = tf.keras.layers.Dense(43, activation='softmax')(h3)

    model = tf.keras.models.Model(inputs=inp, outputs=out)
    model.compile(
        loss='sparse_categorical_crossentropy',
        optimizer=tf.keras.optimizers.Adam(0.001),
        metrics=['accuracy'],
    )
    return model

In [7]:
model = get_nn()

X_new, Y_new = {}, {}
X_new['train'], X_new['test'], Y_new['train'], Y_new['test'] = train_test_split(X_proj, Y, test_size=0.5)

result = model.fit(
    X_new['train'], Y_new['train'],
    epochs=20,
    validation_data=(X_new['test'], Y_new['test']),
)

2022-07-14 00:14:34.699670: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


### Save model and results

In [8]:
def save_results_as_csv(results_dict, file_name):
    base_dir = os.path.abspath('../')
    rawpath = os.path.join(base_dir, 'data/saved_results/' + file_name + '.csv')
    print('Saving results as csv in  %s' % rawpath)
    df = pd.DataFrame(results_dict)
    df.to_csv(rawpath)

def saved_model(model, model_name):
    base_dir = os.path.abspath('../')
    rawpath = os.path.join(base_dir, 'data/saved_models/' + model_name + '.h5')
    print('Saving model to %s' % rawpath)
    tf.keras.models.save_model(model, rawpath)

In [9]:
saved_model(model, MODEL_NAME)
save_results_as_csv(result.history, MODEL_NAME)

Saving model to /Users/pragyasingh/PycharmProjects/LOGML2/orig_repo/kreuzer-skarke-ML/data/saved_models/test_model.h5
Saving results as csv in  /Users/pragyasingh/PycharmProjects/LOGML2/orig_repo/kreuzer-skarke-ML/data/saved_results/test_model.csv


### Load and re-use saved model

In [10]:
def load_model(model_name):
    base_dir = os.path.abspath('../')
    rawpath = os.path.join(base_dir, 'data/saved_models/' + model_name + '.h5')
    print('Loading model from %s' % rawpath)
    model = tf.keras.models.load_model(rawpath)
    print(model.summary())
    return model

In [11]:
model = load_model(MODEL_NAME)

X_new, Y_new = {}, {}
X_new['train'], X_new['test'], Y_new['train'], Y_new['test'] = train_test_split(X_proj, Y, test_size=0.5)

result = model.fit(
    X_new['train'], Y_new['train'],
    epochs=20,
    validation_data=(X_new['test'], Y_new['test']),
)

Loading model from /Users/pragyasingh/PycharmProjects/LOGML2/orig_repo/kreuzer-skarke-ML/data/saved_models/test_model.h5
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 4, 26)]           0         
                                                                 
 reshape (Reshape)           (None, 104)               0         
                                                                 
 dense (Dense)               (None, 100)               10500     
                                                                 
 dense_1 (Dense)             (None, 50)                5050      
                                                                 
 dense_2 (Dense)             (None, 50)                2550      
                                                                 
 dense_3 (Dense)             (None, 43)                2193      
      