## ⭐ Setup

### Packages Handling

In [None]:
!pip install mrcfile

Collecting mrcfile
  Downloading mrcfile-1.4.3-py2.py3-none-any.whl (43 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/43.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.9/43.9 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: mrcfile
Successfully installed mrcfile-1.4.3


In [None]:
import os
import numpy as np
import mrcfile
from sklearn.model_selection import train_test_split

### Function

In [None]:
def save_mrc_to_npy(filenames, mrc_dir, save_dir, permissive=False):
  for filename in filenames:
    filepath = os.path.join(mrc_dir, filename)
    print("\rReading mrc:", filepath, end="", flush=True)
    with mrcfile.open(filepath, permissive=permissive) as mrc:
      basename, ext = os.path.splitext(filename)
      assert ext == ".mrc"
      print("\rConverting mrc to npy.", end="", flush=True)
      np.save(os.path.join(save_dir, basename), mrc.data)

## ⭐ Main

In [None]:
# @title Setting directory for mrc files.

MRC_DIR = "/content/drive/MyDrive/research_xs/processed_micrographs" # @param {type:"string"}

In [None]:
# title Train test split
random_state = 42 # @param {type:"integer"}
test_size = "0.2" # @param {type:"string"}
val_size = "0.1" # @param {type:"string"}
filenames = sorted(os.listdir(MRC_DIR))

test_size = float(test_size)
if test_size and not 0<=test_size<1:
  raise ValueError(f"`test_size` should be between 0.0 and 1.0, got: {test_size}")
val_size = float(val_size)
if val_size and not 0<=val_size<1:
  raise ValueError(f"`val_size` should be between 0.0 and 1.0, got: {val_size}")
train_size = 1 - test_size - val_size
if not 0<train_size<=1:
  raise ValueError(f"`val_size` + `test_size` should be between 0.0 and 1.0, got: {test_size + val_size}")
train_filenames, test_filenames = train_test_split(filenames, test_size=test_size, train_size=train_size)
train_filenames = sorted(train_filenames)
test_filenames = sorted(test_filenames)
val_filenames = []
for filename in filenames:
  if filename not in train_filenames and filename not in test_filenames:
    val_filenames.append(filename)
print("number of files:",
  f" train:\t{len(train_filenames)}",
  f" test:\t{len(test_filenames)}",
  f" val:\t{len(val_filenames)}", sep='\n')

number of files:
 train:	58
 test:	17
 val:	9


In [None]:
%%capture --no-display
# @title Generate np file from mrc files.
# @markdown note that directory for np files should not exist

SAVE_DIR = "/content/drive/MyDrive/research_xs/processed_micrographs_np" # @param {type:"string"}
PERMISSIVE = True # @param {type:"boolean"}

os.mkdir(SAVE_DIR)

os.mkdir(os.path.join(SAVE_DIR, "train"))
save_mrc_to_npy(train_filenames,
                mrc_dir=MRC_DIR,
                save_dir=os.path.join(SAVE_DIR, "train"),
                permissive=PERMISSIVE)
np.savetxt(os.path.join(SAVE_DIR, "train_filenames.txt"), train_filenames, fmt="%s")

os.mkdir(os.path.join(SAVE_DIR, "test"))
save_mrc_to_npy(test_filenames, mrc_dir=MRC_DIR,
                save_dir=os.path.join(SAVE_DIR, "test"),
                permissive=PERMISSIVE)
np.savetxt(os.path.join(SAVE_DIR, "test_filenames.txt"), test_filenames, fmt="%s")

os.mkdir(os.path.join(SAVE_DIR, "val"))
save_mrc_to_npy(val_filenames, mrc_dir=MRC_DIR,
                save_dir=os.path.join(SAVE_DIR, "val"),
                permissive=PERMISSIVE)
np.savetxt(os.path.join(SAVE_DIR, "val_filenames.txt"), val_filenames, fmt="%s")

Reading mrc: /content/drive/MyDrive/research_xs/processed_micrographs/Falcon_2012_06_12-14_33_35_0.mrc



Converting mrc to npy.