# Run cryolo train and pick inside CryoSPARC (with CryoSPARC-tools)
- based on https://tools.cryosparc.com/examples/cryolo.html
- just modified to give value for power and ncc_score (see https://discuss.cryosparc.com/t/cryolo-particle-picking-problem-unable-to-see-power-histogram-and-picked-particles-during-inspect-picks/10838/2?u=frozenfas)

In [None]:
# **************************************************************************
# *
# * Authors:    Sean Connell (sean.connell@gmail.com)
# *
# * Structural Biology of Cellular Machines
# *
# **************************************************************************

In [None]:
# CS instance information
cshost = "XXX"
port = XXX
project = "P13"
cslicense = "XXX" 
email = "XXX"
pword = "XXX"

# training data
pxl_sz_A = 0.723
particle_blob_size = 480

train_mic_job = "J13"
train_mic_name = "split_0"
train_particle_job = "J14"
train_particle_name = "particles"

# picking data
pick_small_mic_job = "J17"
pick_small_mic_name = "split_0"
pick_large_mic_job = "J17"
pick_large_mic_name = "remainder"

# cryolo
cryolo_exe = "XXX"
cryolo_train_exe = "XXX"
cryolo_predict_exe = "XXX"

confidence = 0.1
train = True
fine_tune = True

general_model = "XXX"

filter_tmp = "XXX"


In [None]:
from cryosparc.tools import CryoSPARC

cs = CryoSPARC(host=cshost, base_port=port, license=cslicense, email=email, password=pword)
assert cs.test_connection()




In [None]:
project = cs.find_project(project)

job = project.create_external_job("W3", title="crYOLO Picks")
job.connect("train_micrographs", train_mic_job, train_mic_name, slots=["micrograph_blob"])
job.connect("train_particles", train_particle_job, train_particle_name, slots=["location"])
job.connect("all_micrographs", pick_small_mic_job, pick_small_mic_name, slots=["micrograph_blob"])
#job.connect("all_micrographs", pick_large_mic_job, pick_large_mic_name, slots=["micrograph_blob"])
job.add_output("particle", "predicted_particles", slots=["location", "pick_stats"])

In [None]:
job.start()
#job.queue("slinky")

In [None]:
job.status

In [None]:
job.mkdir("full_data")
job.mkdir("train_image")
job.mkdir("train_annot")

In [None]:
all_micrographs = job.load_input("all_micrographs", ["micrograph_blob"])
train_micrographs = job.load_input("train_micrographs", ["micrograph_blob"])

for mic in all_micrographs.rows():
    source = mic["micrograph_blob/path"]
    target = job.uid + "/full_data/" + source.split("/")[-1]
    project.symlink(source, target)

for mic in train_micrographs.rows():
    source = mic["micrograph_blob/path"]
    target = job.uid + "/train_image/" + source.split("/")[-1]
    project.symlink(source, target)

In [None]:
from io import StringIO
import numpy as np
from numpy.core import records
from cryosparc import star

job.mkdir("train_annot/STAR")
train_particles = job.load_input("train_particles", ["location"])

for micrograph_path, particles in train_particles.split_by("location/micrograph_path").items():
    micrograph_name = micrograph_path.split("/")[-1]
    star_file_name = micrograph_name.rsplit(".", 1)[0] + ".star"

    mic_w = particles["location/micrograph_shape"][:, 1]
    mic_h = particles["location/micrograph_shape"][:, 0]
    center_x = particles["location/center_x_frac"]
    center_y = particles["location/center_y_frac"]
    location_x = center_x * mic_w
    location_y = center_y * mic_h

    outfile = StringIO()
    star.write(
        outfile,
        records.fromarrays([location_x, location_y], names=["rlnCoordinateX", "rlnCoordinateY"]),
    )
    outfile.seek(0)
    job.upload("train_annot/STAR/" + star_file_name, outfile)

In [None]:
%matplotlib inline


from cryosparc import mrc
from cryosparc.tools import downsample, lowpass2
import matplotlib.pyplot as plt

header, mic = project.download_mrc(micrograph_path)
binned = downsample(mic, factor=3)
lowpassed = lowpass2(binned, psize_A=pxl_sz_A, cutoff_resolution_A=20, order=0.7)
height, width = lowpassed.shape
vmin = np.percentile(lowpassed, 1)
vmax = np.percentile(lowpassed, 99)

fig, ax = plt.subplots(figsize=(7.5, 8), dpi=144)
ax.axis("off")
ax.imshow(lowpassed, cmap="gray", vmin=vmin, vmax=vmax, origin="lower")
ax.scatter(center_x * width, center_y * height, c="yellow", marker="+")

fig.tight_layout()

In [None]:
#cmd =  f"{cryolo_exe } config config_cryolo.json {particle_blob_size} --train_image_folder train_image --train_annot_folder train_annot --pretrained_weights {general_model} --filtered_output {filter_tmp}".split(" ")
cmd =  f"{cryolo_exe } config config_cryolo.json {particle_blob_size} --filter LOWPASS --low_pass_cutoff 0.1 --filtered_output {filter_tmp}".split(" ")

job.subprocess(cmd ,cwd=job.dir())

In [None]:
cmd = f"{cryolo_predict_exe} -c config_cryolo.json -w {general_model} -i full_data -g 0 -o boxfiles -t {confidence} --otf".split(" ")

#cmd=f"{cryolo_predict_exe} -c config_cryolo.json -w cryolo_model.h5 -i full_data -g 0 -o boxfiles -t {confidence} --otf".split(" ")

job.mkdir("boxfiles")

job.subprocess(
    cmd,
    cwd=job.dir(),
    mute=True,
    checkpoint=True,
)

In [None]:
output_star_folder = "STAR"

all_predicted = []
for mic in all_micrographs.rows():
    micrograph_path = mic["micrograph_blob/path"]
    micrograph_name = micrograph_path.split("/")[-1]
    #print(micrograph_name )
    height, width = mic["micrograph_blob/shape"]

    starfile_path = "boxfiles/CRYOSPARC/cryosparc.star" 
    locations = star.read(job.dir() / starfile_path)[""]
    center_x = locations[locations['rlnMicrographName'] == micrograph_name]['rlnCoordinateX'] / width
    center_y = locations[locations['rlnMicrographName'] == micrograph_name]['rlnCoordinateY'] / height
    fig_merit = locations[locations['rlnMicrographName'] == micrograph_name]['rlnAutopickFigureOfMerit']
    
    predicted = job.alloc_output("predicted_particles", len(locations[locations['rlnMicrographName'] == micrograph_name]))
    predicted["location/micrograph_uid"] = mic["uid"]
    predicted["location/micrograph_path"] = mic["micrograph_blob/path"]
    predicted["location/micrograph_shape"] = mic["micrograph_blob/shape"]
    predicted["location/center_x_frac"] = center_x
    predicted["location/center_y_frac"] = center_y
    predicted["pick_stats/ncc_score"] = fig_merit
    predicted["pick_stats/power"] = fig_merit
    

    all_predicted.append(predicted)

In [None]:
header, mic = project.download_mrc(micrograph_path)
binned = downsample(mic, factor=3)
lowpassed = lowpass2(binned, psize_A=pxl_sz_A, cutoff_resolution_A=20, order=0.7)
height, width = lowpassed.shape
vmin = np.percentile(lowpassed, 1)
vmax = np.percentile(lowpassed, 99)

fig, ax = plt.subplots(figsize=(7.5, 8), dpi=144)
ax.axis("off")
ax.imshow(lowpassed, cmap="gray", vmin=vmin, vmax=vmax, origin="lower")
ax.scatter(center_x * width, center_y * height, c="cyan", marker="+")

fig.tight_layout()

In [None]:
from cryosparc.dataset import Dataset

job.save_output("predicted_particles", Dataset.append(*all_predicted))
job.stop()