In [1]:
import torch
from torch import nn
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle

import numpy as np
import polars as pl
import gsd.hoomd
import schmeud._schmeud as schmeud_rs
from schmeud._schmeud import statics
from schmeud import ml
from tqdm import tqdm

import glob
import os
import pathlib
import pickle
import signac
import freud
from numba import njit

from dataclasses import dataclass
from collections import defaultdict

import matplotlib.pyplot as plt
from scipy import stats
# import hoomd

In [2]:
from monk import workflow, utils

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
parent = pathlib.Path(os.getcwd()).parent / "config.yaml"
config = workflow.get_config(parent.as_posix())
parent, config

(PosixPath('/home/ian/Projects/work/monk/workflows/2d-osc-shear/config.yaml'),
 {'root': '/media/ian/Data2/monk/2d-osc-shear',
  'origin': '/media/ian/Data2/monk/2d-esl'})

In [4]:
project: signac.Project = signac.get_project(root=config['root'])
project.doc

{'avail_seed': 18, 'dt': 0.005, 'step_unit': 200, 'equil_time': 100, 'min_periods': 20, 'dumps': 40, 'period_times': [30.0, 100.0, 300.0, 1000.0], 'max_shears': [0.01, 0.02, 0.03, 0.05, 0.08, 0.12, 0.04, 0.06, 0.07], '_status': {}}

In [5]:
pipe = None
pipe0 = None
pipe1 = None
with open("svc.pkl", "rb") as f:
    pipe = pickle.load(f)

with open("svc_type0.pkl", "rb") as f:
    pipe0 = pickle.load(f)

with open("svc_type1.pkl", "rb") as f:
    pipe1 = pickle.load(f)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [6]:
@dataclass(frozen=True, eq=True)
class Statepoint:
    max_shear: float
    period: float
    temp: float
    prep: str

In [7]:
def get_pr(soft, prep):
    if prep == "HTL":
        fit = [0.14949216, 0.39594848]
    elif prep == "ESL":
        fit = [0.19945844, 0.42320983]
    else:
        raise ValueError(f"Unknown prep {prep}")
    return fit[0] * soft + fit[1]

In [67]:
# output = defaultdict(list)
x = np.linspace(-1.0, 1.0, 11)
x2 = (x[1:] + x[:-1]) / 2
l = [f"{x:.2f}" for x in x2]
l.insert(0, "-inf")
l.append("inf")

datasets = []

for job in [project.open_job(id="a8f2c249ed85533b56b0729b7ab96d18")]:
    print(job)
    prep = job.sp["prep"]
    # if prep != "ESL":
    #     continue
    # print(prep)
    
    experiments = sorted(glob.glob(job.fn("longer_experiments/*/*/traj-fire_period-*.gsd")))
    if len(experiments) == 0:
        continue
    for exper in experiments:
        max_shear = utils.extract_between(exper, "max-shear-", "/")
        period = utils.extract_between(exper, "period-", ".gsd")
        temp = utils.extract_between(exper, "temp-", "/")
        df_path = f"longer_experiments/max-shear-{max_shear}/temp-{temp}/auto-encoder-dataset-yieldstress_period-{period}.parquet"
        sp = Statepoint(max_shear=float(max_shear), period=float(period), temp=float(temp), prep=prep)
        
        if float(period) != 1000.0 or float(temp) != 0.019836 or float(max_shear) != 0.04:
            continue

        dataset = pl.read_parquet(job.fn(df_path))

        cuts = dataset["soft"].cut(x, labels=l)

        sf_len = dataset[0]["sfs"][0].shape[0]
        X = np.zeros((len(dataset), sf_len + 3 + 1 + 1), dtype=np.float32)
        X[:, 0] = dataset["strain"]
        X[:, 1] = (dataset["id"] == 0)
        X[:, 2] = (dataset["id"] == 1)
        # X[:, 3:6] = np.vstack(dataset["xy"].to_numpy())
        # X[:, 6:9] = np.vstack(dataset["xx"].to_numpy())
        # X[:, 9:12] = np.vstack(dataset["yy"].to_numpy())
        # X[:, 13] = (dataset["soft"].mean())
        # X[:, 14] = (dataset["soft"].std())
        X[:, 3:-2] = np.vstack(dataset["sfs"].to_numpy())
        X[:, -2] = (prep == "ESL")
        # X[:, -7:-1] = np.vstack(dataset["local_soft"].to_numpy())
        X[:, -1] = dataset["soft"]

        # soft_len = dataset[0]["local_soft"][0].shape[0]
        # X = np.zeros((len(dataset), soft_len + 1), dtype=np.float32)
        # X[:, :-1] = np.vstack(dataset["local_soft"].to_numpy())
        # X[:, -1] = dataset["soft"]

        dataset = dataset.with_columns(
            sfs = X,
            # pr = pl.col("soft").map_batches(lambda x: get_pr(x, prep)),
            # prep = pl.lit(prep),
            # cuts = cuts
        )

        # output[sp].append(dataset)
        datasets.append(dataset)

esl_dataset = pl.concat(datasets)

a8f2c249ed85533b56b0729b7ab96d18


In [68]:
# output = defaultdict(list)
x = np.linspace(-1.0, 1.0, 11)
x2 = (x[1:] + x[:-1]) / 2
l = [f"{x:.2f}" for x in x2]
l.insert(0, "-inf")
l.append("inf")

datasets = []

for job in [project.open_job(id="ecce68c50e28a33684826f28780bf6e9")]:
    print(job)
    prep = job.sp["prep"]
    # if prep != "ESL":
    #     continue
    # print(prep)
    
    experiments = sorted(glob.glob(job.fn("longer_experiments/*/*/traj-fire_period-*.gsd")))
    if len(experiments) == 0:
        continue
    for exper in experiments:
        max_shear = utils.extract_between(exper, "max-shear-", "/")
        period = utils.extract_between(exper, "period-", ".gsd")
        temp = utils.extract_between(exper, "temp-", "/")
        df_path = f"longer_experiments/max-shear-{max_shear}/temp-{temp}/auto-encoder-dataset-yieldstress_period-{period}.parquet"
        sp = Statepoint(max_shear=float(max_shear), period=float(period), temp=float(temp), prep=prep)
        
        if float(period) != 1000.0 or float(temp) != 0.019836 or float(max_shear) != 0.04:
            continue

        dataset = pl.read_parquet(job.fn(df_path))

        cuts = dataset["soft"].cut(x, labels=l)

        sf_len = dataset[0]["sfs"][0].shape[0]
        X = np.zeros((len(dataset), sf_len + 3 + 1 + 1), dtype=np.float32)
        X[:, 0] = dataset["strain"]
        X[:, 1] = (dataset["id"] == 0)
        X[:, 2] = (dataset["id"] == 1)
        # X[:, 3:6] = np.vstack(dataset["xy"].to_numpy())
        # X[:, 6:9] = np.vstack(dataset["xx"].to_numpy())
        # X[:, 9:12] = np.vstack(dataset["yy"].to_numpy())
        # X[:, 13] = (dataset["soft"].mean())
        # X[:, 14] = (dataset["soft"].std())
        X[:, 3:-2] = np.vstack(dataset["sfs"].to_numpy())
        X[:, -2] = (prep == "ESL")
        # X[:, -7:-1] = np.vstack(dataset["local_soft"].to_numpy())
        X[:, -1] = dataset["soft"]

        # soft_len = dataset[0]["local_soft"][0].shape[0]
        # X = np.zeros((len(dataset), soft_len + 1), dtype=np.float32)
        # X[:, :-1] = np.vstack(dataset["local_soft"].to_numpy())
        # X[:, -1] = dataset["soft"]

        dataset = dataset.with_columns(
            sfs = X,
            # pr = pl.col("soft").map_batches(lambda x: get_pr(x, prep)),
            # prep = pl.lit(prep),
            # cuts = cuts
        )

        # output[sp].append(dataset)
        datasets.append(dataset)

htl_dataset = pl.concat(datasets)

ecce68c50e28a33684826f28780bf6e9


In [69]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

device = 'cpu'

Using cuda device


In [70]:
dataset.head()

frame,strain,id,soft,sfs
i32,f32,u32,f32,list[f32]
7999,0.04,1,-0.216464,"[0.04, 0.0, … -0.216464]"
7999,0.04,0,1.137346,"[0.04, 1.0, … 1.137346]"
7999,0.04,0,0.330652,"[0.04, 1.0, … 0.330652]"
7999,0.04,1,-0.055136,"[0.04, 0.0, … -0.055136]"
7999,0.04,0,-0.227072,"[0.04, 1.0, … -0.227072]"


In [71]:
class Encoder(nn.Module):
    def __init__(self, num_input_channels: int, base_channel_size: int, latent_dim: int, act_fn: object = nn.ReLU):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(num_input_channels, base_channel_size),
            act_fn(),
            nn.Linear(base_channel_size, base_channel_size),
            act_fn(),
            nn.Linear(base_channel_size, base_channel_size//2),
            act_fn(),
            nn.Linear(base_channel_size//2, base_channel_size//2),
            act_fn(),
            nn.Linear(base_channel_size//2, base_channel_size//2),
            act_fn(),
            nn.Linear(base_channel_size//2, latent_dim),
        )

    def forward(self, x):
        return self.net(x)

class Decoder(nn.Module):
    def __init__(self, base_channel_size: int, latent_dim: int, act_fn: object = nn.ReLU):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(latent_dim + 2, base_channel_size),
            act_fn(),
            nn.Linear(base_channel_size, base_channel_size),
            act_fn(),
            nn.Linear(base_channel_size, base_channel_size),
            act_fn(),
            nn.Linear(base_channel_size, base_channel_size),
            act_fn(),
            nn.Linear(base_channel_size, base_channel_size),
            act_fn(),
            nn.Linear(base_channel_size, 1),
            nn.Sigmoid()
            # nn.Tanh()
        )

    def forward(self, x):
        return self.net(x)


class AutoEncoder(nn.Module):
    def __init__(self, num_input_channels: int, base_channel_size: int, latent_dim: int, act_fn: object = nn.ReLU):
        super().__init__()
        self.num_input_channels = num_input_channels
        self.encoder = Encoder(num_input_channels, base_channel_size, latent_dim, act_fn)
        self.decoder = Decoder(base_channel_size, latent_dim, act_fn)

    def forward(self, x):
        soft = x[:, -1].view(-1, 1)
        y = x[:, 0].view(-1, 1)
        z = self.encoder(x[:, 1: 1 + self.num_input_channels])
        o = self.decoder(torch.cat((y, z, soft), 1))
        return o
    
    def encode(self, x):
        soft = x[:, -1].view(-1, 1)
        y = x[:, 0].view(-1, 1)
        z = self.encoder(x[:, 1: 1 + self.num_input_channels])
        return torch.cat((z, soft), 1)

In [88]:
# Y = (dataset["d2min_rev_10"] > 0.08).to_numpy().astype(np.float32)
# # Y = np.log(dataset["d2min_rev_10"].to_numpy())
# # Y = dataset["pr"].to_numpy()

# data_len = len(Y)
sf_len = esl_dataset[0]["sfs"][0].shape[0]

data_len = len(esl_dataset["sfs"])
X = np.zeros((data_len, sf_len), dtype=np.float32)
X[:, :] = np.vstack(esl_dataset["sfs"].to_numpy())

X_htl = np.zeros((len(htl_dataset), sf_len), dtype=np.float32)
X_htl[:, :] = np.vstack(htl_dataset["sfs"].to_numpy())
# X[:, 0] = dataset["strain"]
# X[:, 1] = (dataset["id"] == 0)
# X[:, 2] = (dataset["id"] == 1)
# X[:, 3] = (dataset["prep"] == "ESL")
# X[:, 4] = (dataset["prep"] == "HTL")
# X[:, 5:-1] = np.vstack(dataset["sfs"].to_numpy())
# X[:, -1] = dataset["soft"]

X, X_htl

(array([[ 0.04      ,  1.        ,  0.        , ...,  0.26299247,
          1.        , -0.49922734],
        [ 0.04      ,  1.        ,  0.        , ...,  1.7951186 ,
          1.        ,  0.8167877 ],
        [ 0.04      ,  1.        ,  0.        , ...,  5.293931  ,
          1.        ,  0.62885255],
        ...,
        [ 0.04      ,  0.        ,  1.        , ...,  0.2964789 ,
          1.        , -0.8768886 ],
        [ 0.04      ,  1.        ,  0.        , ...,  1.347369  ,
          1.        ,  0.01675172],
        [ 0.04      ,  0.        ,  1.        , ...,  2.9410026 ,
          1.        , -0.13685314]], dtype=float32),
 array([[ 0.04      ,  0.        ,  1.        , ...,  1.7199786 ,
          0.        , -0.21646401],
        [ 0.04      ,  1.        ,  0.        , ...,  3.1636333 ,
          0.        ,  1.137346  ],
        [ 0.04      ,  1.        ,  0.        , ...,  0.4502731 ,
          0.        ,  0.33065194],
        ...,
        [ 0.04      ,  0.        ,  1. 

In [89]:

# Normalizing Data
with open("scaler-v8.pkl", "rb") as f:
    scaler = pickle.load(f)
X_scaled = scaler.transform(X)
X_htl_scaled = scaler.transform(X_htl)

# scaler_y = StandardScaler()
# Y_scaled = Y.reshape(-1, 1)

# X_scaled, Y_scaled = shuffle(X_scaled, Y_scaled)

# Converting to PyTorch tensor
X_tensor = torch.FloatTensor(X_scaled)
X_htl_tensor = torch.FloatTensor(X_htl_scaled)
# Y_tensor = torch.FloatTensor(Y_scaled)

In [90]:
if model is not None:
    del model
    torch.cuda.empty_cache()

In [91]:
# Setting random seed for reproducibility
torch.manual_seed(12)

input_size = X.shape[1]  # Number of input features
encoding_dim = 1  # Desired number of output dimensions
# model = AutoEncoder(input_size - 3, 512, encoding_dim).to(device)


In [92]:
# model.eval()

host_model = model.to("cpu")
host_model.encoder(dataset[0]["sfs"][0][:-1])

In [93]:
LOAD_PATH = "autoencoder8-final.pth"

In [94]:
model = AutoEncoder(input_size - 3, 256, encoding_dim)
model.load_state_dict(torch.load(LOAD_PATH))
model #.to(device)
model.eval()

AutoEncoder(
  (encoder): Encoder(
    (net): Sequential(
      (0): Linear(in_features=56, out_features=256, bias=True)
      (1): ReLU()
      (2): Linear(in_features=256, out_features=256, bias=True)
      (3): ReLU()
      (4): Linear(in_features=256, out_features=128, bias=True)
      (5): ReLU()
      (6): Linear(in_features=128, out_features=128, bias=True)
      (7): ReLU()
      (8): Linear(in_features=128, out_features=128, bias=True)
      (9): ReLU()
      (10): Linear(in_features=128, out_features=1, bias=True)
    )
  )
  (decoder): Decoder(
    (net): Sequential(
      (0): Linear(in_features=3, out_features=256, bias=True)
      (1): ReLU()
      (2): Linear(in_features=256, out_features=256, bias=True)
      (3): ReLU()
      (4): Linear(in_features=256, out_features=256, bias=True)
      (5): ReLU()
      (6): Linear(in_features=256, out_features=256, bias=True)
      (7): ReLU()
      (8): Linear(in_features=256, out_features=256, bias=True)
      (9): ReLU()
      (

In [95]:
mid = model.encode(X_tensor).cpu().detach().numpy()
out = model(X_tensor).cpu().detach().numpy()

mid_htl = model.encode(X_htl_tensor).cpu().detach().numpy()
out_htl = model(X_htl_tensor).cpu().detach().numpy()

In [100]:
mid

array([[ 1.1472626 , -0.5649902 ],
       [ 0.5080203 ,  1.6695238 ],
       [ 0.40312007,  1.3504212 ],
       ...,
       [ 0.64361286, -1.2062362 ],
       [-1.2860947 ,  0.311111  ],
       [ 0.62218285,  0.05029925]], dtype=float32)

In [96]:
esl_dataset["sfs"]

sfs
list[f32]
"[0.04, 1.0, … -0.499227]"
"[0.04, 1.0, … 0.816788]"
"[0.04, 1.0, … 0.628853]"
"[0.04, 0.0, … 0.166194]"
"[0.04, 0.0, … -0.351567]"
"[0.04, 0.0, … -0.935419]"
"[0.04, 0.0, … -1.210075]"
"[0.04, 1.0, … -0.21633]"
"[0.04, 1.0, … -0.271183]"
"[0.04, 0.0, … 0.218953]"


In [97]:
np.savez("yieldstress-data.npz", mid=mid, out=out, mid_htl=mid_htl, out_htl=out_htl)

In [98]:
htl_dataset

frame,strain,id,soft,sfs
i32,f32,u32,f32,list[f32]
7999,0.04,1,-0.216464,"[0.04, 0.0, … -0.216464]"
7999,0.04,0,1.137346,"[0.04, 1.0, … 1.137346]"
7999,0.04,0,0.330652,"[0.04, 1.0, … 0.330652]"
7999,0.04,1,-0.055136,"[0.04, 0.0, … -0.055136]"
7999,0.04,0,-0.227072,"[0.04, 1.0, … -0.227072]"
7999,0.04,1,-0.284574,"[0.04, 0.0, … -0.284574]"
7999,0.04,0,-0.597902,"[0.04, 1.0, … -0.597902]"
7999,0.04,0,-0.66942,"[0.04, 1.0, … -0.66942]"
7999,0.04,1,-1.072123,"[0.04, 0.0, … -1.072123]"
7999,0.04,1,-0.653534,"[0.04, 0.0, … -0.653534]"


In [99]:
esl_dataset

frame,strain,id,soft,sfs
i32,f32,u32,f32,list[f32]
7999,0.04,0,-0.499227,"[0.04, 1.0, … -0.499227]"
7999,0.04,0,0.816788,"[0.04, 1.0, … 0.816788]"
7999,0.04,0,0.628853,"[0.04, 1.0, … 0.628853]"
7999,0.04,1,0.166194,"[0.04, 0.0, … 0.166194]"
7999,0.04,1,-0.351567,"[0.04, 0.0, … -0.351567]"
7999,0.04,1,-0.935419,"[0.04, 0.0, … -0.935419]"
7999,0.04,1,-1.210075,"[0.04, 0.0, … -1.210075]"
7999,0.04,0,-0.21633,"[0.04, 1.0, … -0.21633]"
7999,0.04,0,-0.271183,"[0.04, 1.0, … -0.271183]"
7999,0.04,1,0.218953,"[0.04, 0.0, … 0.218953]"
