Add initial "Predicting performance" plots

jbreffle · Feb 17, 2024 · 0eee2ff · 0eee2ff
1 parent be6e67b
commit 0eee2ff
Show file tree

Hide file tree

Showing 10 changed files with 139 additions and 43 deletions.
diff --git a/notebooks/4_nn_predict.ipynb b/notebooks/4_nn_predict.ipynb
diff --git a/src/models.py b/src/models.py
@@ -15,13 +15,16 @@
 MODEL_PATH = pyprojroot.here("models")
 MODEL_PATH.mkdir(parents=True, exist_ok=True)
 
+STREAMLIT_MODE_PATH = pyprojroot.here("streamlit/streamlit-data")
+STREAMLIT_MODE_PATH.mkdir(parents=True, exist_ok=True)
+
 
 # Functions
 def load_loss(model_name, model_path=MODEL_PATH):
     """Load a model from disk."""
     model_hash = model_name.split("_")[1].split(".")[0]
-    train_loss = np.load(model_path / f"train_loss_{model_name}.npy")
-    test_loss = np.load(model_path / f"test_loss_{model_name}.npy")
+    train_loss = np.load(model_path / f"train_loss_{model_hash}.npy")
+    test_loss = np.load(model_path / f"test_loss_{model_hash}.npy")
     return train_loss, test_loss
 
 
@@ -31,15 +34,41 @@ def load_model(model_name, model_path=MODEL_PATH):
     return model
 
 
+def save_for_streamlit(
+    model,
+    train_loss,
+    test_loss,
+    X_test,
+    y_test,
+    streamlit_model_path=STREAMLIT_MODE_PATH,
+):
+    # Save to streamlit
+    model_streamlit = torch.jit.script(model)
+    model_streamlit.save(streamlit_model_path / "streamlit_model.pt")
+    # Save to streamlit
+    np.save(streamlit_model_path / "streamlit_train_loss", train_loss)
+    np.save(streamlit_model_path / "streamlit_test_loss", test_loss)
+    np.save(streamlit_model_path / "X_test", X_test.cpu())
+    np.save(streamlit_model_path / "y_test", y_test.cpu())
+    return None
+
+
 def save_model(
-    model, model_name, train_loss=None, test_loss=None, model_path=MODEL_PATH
+    model,
+    model_name,
+    train_loss=None,
+    test_loss=None,
+    model_path=MODEL_PATH,
+    streamlit_model_path=STREAMLIT_MODE_PATH,
+    save_to_streamlit=False,
 ):
     """Save a model to disk."""
     torch.save(model, model_path / model_name)
+    model_hash = model_name.split("_")[1].split(".")[0]
     if train_loss is not None:
-        np.save(model_path / f"train_loss_{model_name}", train_loss)
+        np.save(model_path / f"train_loss_{model_hash}", train_loss)
     if test_loss is not None:
-        np.save(model_path / f"test_loss_{model_name}", test_loss)
+        np.save(model_path / f"test_loss_{model_hash}", test_loss)
     return None
 
 

diff --git a/src/plot.py b/src/plot.py
@@ -35,9 +35,13 @@ def model_feature_scatter(
         ax=ax1,
         **scatter_kwargs,
     )
-    # Make sure the y-axis is the same for both plots, then hide from the second plot
+    # Make sure the y-axis is the same for both plots
     ax1.set_ylim(ax0.get_ylim())
-    ax1.yaxis.set_visible(False)
+    ax1.set_yticklabels([])
+    for tic in ax1.yaxis.get_major_ticks():
+        tic.tick1line.set_visible(False)
+        tic.tick2line.set_visible(False)
+    ax1.set_ylabel("")
     # Turn off legend for the second plot
     ax1.get_legend().remove()
 
@@ -57,9 +61,8 @@ def model_feature_scatter0(
     ax.scatter(x_values, true_values, c="tab:blue", **scatter_kwargs)
     ax.scatter(x_values, predicted_values, c="tab:orange", **scatter_kwargs)
     ax.set_xlabel(util.get_label_string(x_feature))
-    ax.set_ylabel("Value")
+    ax.set_ylabel(util.get_label_string("wpm"))
     ax.legend(["True", "Predicted"])
-
     return ax
 
 

diff --git a/streamlit/Home.py b/streamlit/Home.py
@@ -82,6 +82,8 @@ def set_plt_style():
         "axes.titlepad": 8,
         "axes.labelsize": 8,
         "axes.titlesize": 12,
+        "legend.frameon": False,
+        "legend.fontsize": 8,
     }
     plt.rcParams.update(params)
     return

diff --git a/streamlit/pages/3_Predicting_performance.py b/streamlit/pages/3_Predicting_performance.py
@@ -1,7 +1,35 @@
+import os
+
 import streamlit as st
+import numpy as np
+import matplotlib.pyplot as plt
+import pyprojroot
+import torch
+from torch import Tensor
+
+from src import plot
 
 import Home
 
+MODEL_PATH = pyprojroot.here("streamlit/streamlit-data/")
+
+
+@st.cache_resource
+def load_model(model_path=MODEL_PATH):
+    """Load a model from disk."""
+    model = torch.jit.load(os.path.join(model_path, "streamlit_model.pt"))
+    train_loss = np.load(model_path / "streamlit_train_loss.npy")
+    test_loss = np.load(model_path / "streamlit_test_loss.npy")
+    X_test = np.load(model_path / "X_test.npy")
+    y_test = np.load(model_path / "y_test.npy")
+    # Select device
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model.to(device)
+    X_test = torch.tensor(X_test, dtype=torch.float).to(device)
+    y_test = torch.tensor(y_test, dtype=torch.float).to(device)
+
+    return model, train_loss, test_loss, X_test, y_test
+
 
 def main():
     """New page."""
@@ -25,19 +53,41 @@ def main():
     st.divider()
 
     st.subheader("Neural network model")
-    nb_url_1 = "https://github.com/jbreffle/monkeytype-analysis/blob/main/notebooks/4_nn_predict.ipynb"
     st.write(
         f"""
         One way to predict performance is to use a neural network model.
+        """
+    )
+    model, train_loss, test_loss, X_test, y_test = load_model()
+    # Plot losses over time
+    fig = plt.figure(figsize=(6, 3))
+    ax = plot.model_loss(train_loss, test_loss)
+    st.pyplot(fig, use_container_width=True, transparent=True)
+    # Plot actual vs predicted
+    test_predictions = model(X_test)
+    fig = plt.figure(figsize=(6, 3))
+    ax = plot.model_scatter(
+        y_test.cpu().detach().numpy(), test_predictions.cpu().detach().numpy()
+    )
+    st.pyplot(fig, use_container_width=True, transparent=True)
+    # Plot actual and predicted across feature values
+    fig = plt.figure(figsize=(6, 3))
+    fig, ax0, ax1 = plot.model_feature_scatter(
+        y_test.cpu(), test_predictions.detach().cpu().numpy(), X_test.cpu(), fig=fig
+    )
+    # Remove legend background
+    st.pyplot(fig, use_container_width=True, transparent=True)
 
+    st.divider()
+
+    nb_url_1 = "https://github.com/jbreffle/monkeytype-analysis/blob/main/notebooks/4_nn_predict.ipynb"
+    st.write(
+        f"""
         Click here
         [./notebooks/4_nn_predict.ipynb]({nb_url_1})
-        for plots that will eventually be included in this app.
+        for additional training and additional plots.
         """
     )
-    # TODO
-    st.divider()
-
     nb_url_2 = "https://github.com/jbreffle/monkeytype-analysis/blob/main/notebooks/5_nn_hyperopti.ipynb"
     st.write(
         f"""

diff --git a/streamlit/streamlit-data/X_test.npy b/streamlit/streamlit-data/X_test.npy
diff --git a/streamlit/streamlit-data/streamlit_model.pt b/streamlit/streamlit-data/streamlit_model.pt
diff --git a/streamlit/streamlit-data/streamlit_test_loss.npy b/streamlit/streamlit-data/streamlit_test_loss.npy
diff --git a/streamlit/streamlit-data/streamlit_train_loss.npy b/streamlit/streamlit-data/streamlit_train_loss.npy
diff --git a/streamlit/streamlit-data/y_test.npy b/streamlit/streamlit-data/y_test.npy