In this notebook we will read a Relion star file, subset it, and write it back to disk.

In [10]:
import pandas as pd
from StarIO import read_star, write_star

# Read a star file

In [2]:
df_dict = read_star("examples/run_it013_data.star")

In [3]:
print(df_dict.keys())

dict_keys(['data'])


Here we have only one dataframe, with the generic name "data". To access the data frame from the dictionary:

In [11]:
df = df_dict["data"]

In [12]:
df

Unnamed: 0,rlnMicrographName,rlnCoordinateX,rlnCoordinateY,rlnImageName,rlnDefocusU,rlnDefocusV,rlnDefocusAngle,rlnPhaseShift,rlnVoltage,rlnSphericalAberration,...,rlnCtfMaxResolution,rlnCtfFigureOfMerit,rlnGroupNumber,rlnClassNumber,rlnNormCorrection,rlnLogLikeliContribution,rlnMaxValueProbDistribution,rlnNrOfSignificantSamples,rlnGroupName,rlnRandomSubset
0,MotionCorr/job002/Micrographs/20191025_1252_A0...,834.545410,850.909058,000012@Extract/job012/Micrographs/20191025_125...,15376.422852,14312.515625,23.760529,0.0,200.0,2.7,...,4.569044,0.071087,1,1,0.857364,58808.724788,0.205808,14,group_78,2
1,MotionCorr/job002/Micrographs/20191025_1252_A0...,1243.636230,2961.817871,000015@Extract/job012/Micrographs/20191025_125...,15376.422852,14312.515625,23.760529,0.0,200.0,2.7,...,4.569044,0.071087,1,1,0.862628,59673.819842,0.235101,8,group_78,1
2,MotionCorr/job002/Micrographs/20191025_1252_A0...,3402.624023,1960.278687,000017@Extract/job012/Micrographs/20191025_125...,15376.422852,14312.515625,23.760529,0.0,200.0,2.7,...,4.569044,0.071087,1,1,0.871292,59321.599249,0.216328,22,group_78,2
3,MotionCorr/job002/Micrographs/20191025_1252_A0...,4074.545166,1930.908936,000001@Extract/job012/Micrographs/20191025_125...,15376.422852,14312.515625,23.760529,0.0,200.0,2.7,...,4.569044,0.071087,1,1,0.871992,59331.710372,0.679952,2,group_78,2
4,MotionCorr/job002/Micrographs/20191025_1252_A0...,1963.636353,1881.818115,000002@Extract/job012/Micrographs/20191025_125...,15376.422852,14312.515625,23.760529,0.0,200.0,2.7,...,4.569044,0.071087,1,1,0.876302,59315.162707,0.415812,2,group_78,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22749,MotionCorr/job002/Micrographs/20191025_1252_A0...,3747.272461,1079.999878,000016@Extract/job012/Micrographs/20191025_125...,5558.317383,4541.786621,28.256891,0.0,200.0,2.7,...,4.750596,0.058816,79,1,0.831493,61889.461934,0.076569,48,group_02,2
22750,MotionCorr/job002/Micrographs/20191025_1252_A0...,439.259766,2950.251953,000017@Extract/job012/Micrographs/20191025_125...,5558.317383,4541.786621,28.256891,0.0,200.0,2.7,...,4.750596,0.058816,79,1,0.816456,61117.169326,0.145702,72,group_02,1
22751,MotionCorr/job002/Micrographs/20191025_1252_A0...,2912.727051,1030.909058,000001@Extract/job012/Micrographs/20191025_125...,5558.317383,4541.786621,28.256891,0.0,200.0,2.7,...,4.750596,0.058816,79,1,0.833504,61835.396813,0.087845,25,group_02,2
22752,MotionCorr/job002/Micrographs/20191025_1252_A0...,1947.272583,3469.090576,000004@Extract/job012/Micrographs/20191025_125...,5558.317383,4541.786621,28.256891,0.0,200.0,2.7,...,4.750596,0.058816,79,1,0.834830,62320.116387,0.175700,7,group_02,1


# Take a simple subset (the top 5 particles)

In [13]:
subset = df.iloc[:5]

In [14]:
subset

Unnamed: 0,rlnMicrographName,rlnCoordinateX,rlnCoordinateY,rlnImageName,rlnDefocusU,rlnDefocusV,rlnDefocusAngle,rlnPhaseShift,rlnVoltage,rlnSphericalAberration,...,rlnCtfMaxResolution,rlnCtfFigureOfMerit,rlnGroupNumber,rlnClassNumber,rlnNormCorrection,rlnLogLikeliContribution,rlnMaxValueProbDistribution,rlnNrOfSignificantSamples,rlnGroupName,rlnRandomSubset
0,MotionCorr/job002/Micrographs/20191025_1252_A0...,834.54541,850.909058,000012@Extract/job012/Micrographs/20191025_125...,15376.422852,14312.515625,23.760529,0.0,200.0,2.7,...,4.569044,0.071087,1,1,0.857364,58808.724788,0.205808,14,group_78,2
1,MotionCorr/job002/Micrographs/20191025_1252_A0...,1243.63623,2961.817871,000015@Extract/job012/Micrographs/20191025_125...,15376.422852,14312.515625,23.760529,0.0,200.0,2.7,...,4.569044,0.071087,1,1,0.862628,59673.819842,0.235101,8,group_78,1
2,MotionCorr/job002/Micrographs/20191025_1252_A0...,3402.624023,1960.278687,000017@Extract/job012/Micrographs/20191025_125...,15376.422852,14312.515625,23.760529,0.0,200.0,2.7,...,4.569044,0.071087,1,1,0.871292,59321.599249,0.216328,22,group_78,2
3,MotionCorr/job002/Micrographs/20191025_1252_A0...,4074.545166,1930.908936,000001@Extract/job012/Micrographs/20191025_125...,15376.422852,14312.515625,23.760529,0.0,200.0,2.7,...,4.569044,0.071087,1,1,0.871992,59331.710372,0.679952,2,group_78,2
4,MotionCorr/job002/Micrographs/20191025_1252_A0...,1963.636353,1881.818115,000002@Extract/job012/Micrographs/20191025_125...,15376.422852,14312.515625,23.760529,0.0,200.0,2.7,...,4.569044,0.071087,1,1,0.876302,59315.162707,0.415812,2,group_78,1


# Write the datframe to a file

In [9]:
write_star("examples/subset.star",subset)

Some other star files:

In [15]:
df_dict = read_star("examples/run_it013_half2_model.star")

In [16]:
df_dict.keys()

dict_keys(['model_general', 'model_classes', 'model_class_1', 'model_groups', 'model_group_1', 'model_group_2', 'model_group_3', 'model_group_4', 'model_group_5', 'model_group_6', 'model_group_7', 'model_group_8', 'model_group_9', 'model_group_10', 'model_group_11', 'model_group_12', 'model_group_13', 'model_group_14', 'model_group_15', 'model_group_16', 'model_group_17', 'model_group_18', 'model_group_19', 'model_group_20', 'model_group_21', 'model_group_22', 'model_group_23', 'model_group_24', 'model_group_25', 'model_group_26', 'model_group_27', 'model_group_28', 'model_group_29', 'model_group_30', 'model_group_31', 'model_group_32', 'model_group_33', 'model_group_34', 'model_group_35', 'model_group_36', 'model_group_37', 'model_group_38', 'model_group_39', 'model_group_40', 'model_group_41', 'model_group_42', 'model_group_43', 'model_group_44', 'model_group_45', 'model_group_46', 'model_group_47', 'model_group_48', 'model_group_49', 'model_group_50', 'model_group_51', 'model_group_

In [17]:
df_dict["model_group_69"]

Unnamed: 0,rlnSpectralIndex,rlnResolution,rlnSigma2Noise
0,0,0.000000,0.000023
1,1,0.001097,0.000013
2,2,0.002195,0.000005
3,3,0.003292,0.000004
4,4,0.004389,0.000003
...,...,...,...
252,252,0.276510,0.000002
253,253,0.277607,0.000002
254,254,0.278704,0.000002
255,255,0.279802,0.000002


In [18]:
df_dict = read_star("examples/run_it013_optimiser.star")

In [20]:
df_dict.keys()

dict_keys(['optimiser_general'])

In [21]:
df_dict["optimiser_general"]

Unnamed: 0,rlnOutputRootName,rlnModelStarFile,rlnModelStarFile2,rlnExperimentalDataStarFile,rlnOrientSamplingStarFile,rlnCurrentIteration,rlnNumberOfIterations,rlnDoSplitRandomHalves,rlnJoinHalvesUntilThisResolution,rlnAdaptiveOversampleOrder,...,rlnDoCorrectCtf,rlnDoRealignMovies,rlnDoIgnoreCtfUntilFirstPeak,rlnCtfDataArePhaseFlipped,rlnCtfDataAreCtfPremultiplied,rlnDoOnlyFlipCtfPhases,rlnRefsAreCtfCorrected,rlnFixSigmaNoiseEstimates,rlnFixSigmaOffsetEstimates,rlnMaxNumberOfPooledParticles
0,Refine3D/job020/run,Refine3D/job020/run_it013_half1_model.star,Refine3D/job020/run_it013_half2_model.star,Refine3D/job020/run_it013_data.star,Refine3D/job020/run_it013_sampling.star,13,999,1,40.0,1,...,0,0,0,0,0,0,0,0,0,9
