Skip to content

Commit

Permalink
Add initial "Predicting performance" plots
Browse files Browse the repository at this point in the history
  • Loading branch information
jbreffle committed Feb 17, 2024
1 parent be6e67b commit 0eee2ff
Show file tree
Hide file tree
Showing 10 changed files with 139 additions and 43 deletions.
70 changes: 41 additions & 29 deletions notebooks/4_nn_predict.ipynb

Large diffs are not rendered by default.

39 changes: 34 additions & 5 deletions src/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,16 @@
MODEL_PATH = pyprojroot.here("models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)

STREAMLIT_MODE_PATH = pyprojroot.here("streamlit/streamlit-data")
STREAMLIT_MODE_PATH.mkdir(parents=True, exist_ok=True)


# Functions
def load_loss(model_name, model_path=MODEL_PATH):
"""Load a model from disk."""
model_hash = model_name.split("_")[1].split(".")[0]
train_loss = np.load(model_path / f"train_loss_{model_name}.npy")
test_loss = np.load(model_path / f"test_loss_{model_name}.npy")
train_loss = np.load(model_path / f"train_loss_{model_hash}.npy")
test_loss = np.load(model_path / f"test_loss_{model_hash}.npy")
return train_loss, test_loss


Expand All @@ -31,15 +34,41 @@ def load_model(model_name, model_path=MODEL_PATH):
return model


def save_for_streamlit(
model,
train_loss,
test_loss,
X_test,
y_test,
streamlit_model_path=STREAMLIT_MODE_PATH,
):
# Save to streamlit
model_streamlit = torch.jit.script(model)
model_streamlit.save(streamlit_model_path / "streamlit_model.pt")
# Save to streamlit
np.save(streamlit_model_path / "streamlit_train_loss", train_loss)
np.save(streamlit_model_path / "streamlit_test_loss", test_loss)
np.save(streamlit_model_path / "X_test", X_test.cpu())
np.save(streamlit_model_path / "y_test", y_test.cpu())
return None


def save_model(
model, model_name, train_loss=None, test_loss=None, model_path=MODEL_PATH
model,
model_name,
train_loss=None,
test_loss=None,
model_path=MODEL_PATH,
streamlit_model_path=STREAMLIT_MODE_PATH,
save_to_streamlit=False,
):
"""Save a model to disk."""
torch.save(model, model_path / model_name)
model_hash = model_name.split("_")[1].split(".")[0]
if train_loss is not None:
np.save(model_path / f"train_loss_{model_name}", train_loss)
np.save(model_path / f"train_loss_{model_hash}", train_loss)
if test_loss is not None:
np.save(model_path / f"test_loss_{model_name}", test_loss)
np.save(model_path / f"test_loss_{model_hash}", test_loss)
return None


Expand Down
11 changes: 7 additions & 4 deletions src/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,13 @@ def model_feature_scatter(
ax=ax1,
**scatter_kwargs,
)
# Make sure the y-axis is the same for both plots, then hide from the second plot
# Make sure the y-axis is the same for both plots
ax1.set_ylim(ax0.get_ylim())
ax1.yaxis.set_visible(False)
ax1.set_yticklabels([])
for tic in ax1.yaxis.get_major_ticks():
tic.tick1line.set_visible(False)
tic.tick2line.set_visible(False)
ax1.set_ylabel("")
# Turn off legend for the second plot
ax1.get_legend().remove()

Expand All @@ -57,9 +61,8 @@ def model_feature_scatter0(
ax.scatter(x_values, true_values, c="tab:blue", **scatter_kwargs)
ax.scatter(x_values, predicted_values, c="tab:orange", **scatter_kwargs)
ax.set_xlabel(util.get_label_string(x_feature))
ax.set_ylabel("Value")
ax.set_ylabel(util.get_label_string("wpm"))
ax.legend(["True", "Predicted"])

return ax


Expand Down
2 changes: 2 additions & 0 deletions streamlit/Home.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ def set_plt_style():
"axes.titlepad": 8,
"axes.labelsize": 8,
"axes.titlesize": 12,
"legend.frameon": False,
"legend.fontsize": 8,
}
plt.rcParams.update(params)
return
Expand Down
60 changes: 55 additions & 5 deletions streamlit/pages/3_Predicting_performance.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,35 @@
import os

import streamlit as st
import numpy as np
import matplotlib.pyplot as plt
import pyprojroot
import torch
from torch import Tensor

from src import plot

import Home

MODEL_PATH = pyprojroot.here("streamlit/streamlit-data/")


@st.cache_resource
def load_model(model_path=MODEL_PATH):
"""Load a model from disk."""
model = torch.jit.load(os.path.join(model_path, "streamlit_model.pt"))
train_loss = np.load(model_path / "streamlit_train_loss.npy")
test_loss = np.load(model_path / "streamlit_test_loss.npy")
X_test = np.load(model_path / "X_test.npy")
y_test = np.load(model_path / "y_test.npy")
# Select device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
X_test = torch.tensor(X_test, dtype=torch.float).to(device)
y_test = torch.tensor(y_test, dtype=torch.float).to(device)

return model, train_loss, test_loss, X_test, y_test


def main():
"""New page."""
Expand All @@ -25,19 +53,41 @@ def main():
st.divider()

st.subheader("Neural network model")
nb_url_1 = "https://github.com/jbreffle/monkeytype-analysis/blob/main/notebooks/4_nn_predict.ipynb"
st.write(
f"""
One way to predict performance is to use a neural network model.
"""
)
model, train_loss, test_loss, X_test, y_test = load_model()
# Plot losses over time
fig = plt.figure(figsize=(6, 3))
ax = plot.model_loss(train_loss, test_loss)
st.pyplot(fig, use_container_width=True, transparent=True)
# Plot actual vs predicted
test_predictions = model(X_test)
fig = plt.figure(figsize=(6, 3))
ax = plot.model_scatter(
y_test.cpu().detach().numpy(), test_predictions.cpu().detach().numpy()
)
st.pyplot(fig, use_container_width=True, transparent=True)
# Plot actual and predicted across feature values
fig = plt.figure(figsize=(6, 3))
fig, ax0, ax1 = plot.model_feature_scatter(
y_test.cpu(), test_predictions.detach().cpu().numpy(), X_test.cpu(), fig=fig
)
# Remove legend background
st.pyplot(fig, use_container_width=True, transparent=True)

st.divider()

nb_url_1 = "https://github.com/jbreffle/monkeytype-analysis/blob/main/notebooks/4_nn_predict.ipynb"
st.write(
f"""
Click here
[./notebooks/4_nn_predict.ipynb]({nb_url_1})
for plots that will eventually be included in this app.
for additional training and additional plots.
"""
)
# TODO
st.divider()

nb_url_2 = "https://github.com/jbreffle/monkeytype-analysis/blob/main/notebooks/5_nn_hyperopti.ipynb"
st.write(
f"""
Expand Down
Binary file added streamlit/streamlit-data/X_test.npy
Binary file not shown.
Binary file added streamlit/streamlit-data/streamlit_model.pt
Binary file not shown.
Binary file added streamlit/streamlit-data/streamlit_test_loss.npy
Binary file not shown.
Binary file added streamlit/streamlit-data/streamlit_train_loss.npy
Binary file not shown.
Binary file added streamlit/streamlit-data/y_test.npy
Binary file not shown.

0 comments on commit 0eee2ff

Please sign in to comment.