In [None]:
from pathlib import Path


from classification.bert import run_bert
from classification.nn import preprocess_data
from computation.expert import expert_integration
from textbooks.data import Textbook

# Fine-tune model using expert data

In [None]:
expert_dataset = expert_integration(
    base_textbook=Textbook.from_json(
        Path("textbooks-parsed/2012_Book_ModernMathematicalStatisticsWi.json")
    ),
    other_textbooks=(
        Textbook.from_json(
            Path("textbooks-parsed/Walpole_Probability_and_Statistics.json")
        ),
    ),
).dataset

In [None]:
X, y, _ = run_bert(expert_dataset)
num_classes, X_train, X_test, y_train, y_test = preprocess_data(X, y)

In [None]:
from keras.layers import LSTM, SimpleRNN

from classification.nn import grid_search_neural_networks

param_grid = {
    "model__units": [100, 125, 150, 200],
    "model__dropout_rate": [0.4, 0.6, 0.8, 0.9],
    "model__model_type": [LSTM, SimpleRNN],
    "batch_size": [32, 64, 128],
}

best_model = grid_search_neural_networks(
    num_classes=num_classes,
    X_train=reshape(X_train),
    y_train=y_train,
    param_grid=param_grid,
)
best_params = best_model.best_params_

performance_metrics(best_model, X_test, y_test)

# Cross-validation using generated data

In [None]:
import os

start_caffeinate = lambda: os.system("caffeinate &")
end_caffeinate = lambda: os.system("pkill caffeinate")

In [2]:
from keras.layers import LSTM

from classification.cross_validation import cross_validate

best_params = {
    "batch_size": 32,
    "model__dropout_rate": 0.8,
    "model__model_type": LSTM,
    "model__units": 150,
}

start_caffeinate()

cross_validate(
    base_textbook="2012_Book_ModernMathematicalStatisticsWi", params=best_params
)

end_caffeinate()