## Tests for StructuralFeatures class on RNAP structure ##

In [134]:
import sbmlcore, pandas
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Create/load in an initial dataframe of mutations 
N.B. These mutations must be named using residues that actually exist in the wild-type 
and numbered according to your later specified resid offsets!

In [173]:
b = {'segid': ['A', 'A', 'A', 'B', 'C', 'C'], 'mutation': ['I3D','S4K', 'Q5V', 'R6D', 'S450F', 'D435F']} #N.B. Mutation must include offset
df = pandas.DataFrame.from_dict(b)
df
df.dtypes

segid       object
mutation    object
dtype: object

Use the StructuralDistances class to load in the file (first argument), <br/>
identify the groups of atoms you care about (second argument, uses MDAnalysis syntax) - distances will be calculated from the centre of mass of this whole selection, <br/>
name the resulting column of distances to each Ca in the structure (third argument, string of your choice), <br/>
define any offsets for the different chains (fourth argument, must be a dictionary of the form {'segid': int, ...}). <br/>
N.B. Distances are from the centre of mass of the specified atom selection to each C-alpha. 

In [174]:
a = sbmlcore.StructuralDistances('tests/5uh6.pdb','resname MG', 'Mg_distance', offsets = {'A': 1, 'B': 0, 'C': -6})

Now add feature of interest to existing dataframe

In [175]:
df = a.add_feature(df)
df

AssertionError: Too many NaNs! Have you defined your offsets correctly?

In [143]:
print("This is no rows:", len(df['Mg_distance'])) #gives length of column 'Mg_distance'

half_data = len(df['Mg_distance'])//2 # // divides and rounds DOWN to nearest int
print("This is half the no rows, rounded down:", half_data)

total_nans = df['Mg_distance'].isna().sum()
print("This is total NaNs:", total_nans)

if total_nans >= half_data:
    print("Too many NaNs!")

This is no rows: 6
This is half the no rows, rounded down: 3
This is total NaNs: 0


In [144]:
df.loc[1, "Mg_distance"]

59.36286708720095

In [145]:
df.dtypes
df['Mg_distance'].dtypes

dtype('float64')

In [154]:
for i in df['Mg_distance']:
    print(type(i))
    assert isinstance(i, float), "Distances must be floats!"

<class 'float'>
<class 'float'>
<class 'float'>
<class 'float'>
<class 'float'>
<class 'float'>


In [165]:
a = sbmlcore.StructuralDistances('tests/5uh6.pdb','index 26082', 'Zn1_distance', offsets = {'A': 0, 'B': 0, 'C': -6})

In [166]:
df = a.add_feature(df)
df

This is no rows: 6
This is half the no rows, rounded down: 3
This is total NaNs: 0


Unnamed: 0,segid,mutation,Mg_distance,Zn1_distance
0,A,I3D,58.457644,78.0487
1,A,S4K,59.362867,80.462392
2,A,Q5V,59.359265,81.153935
3,B,R6D,43.019331,66.699269
4,C,S450F,26.846155,58.280002
5,C,D435F,24.022115,61.736525


In [18]:
a = sbmlcore.StructuralDistances('tests/5uh6.pdb','index 26083', 'Zn2_distance', offsets = {'A': 0, 'B': 0, 'C': -6})

In [19]:
df = a.add_feature(df)
df

Unnamed: 0,segid,mutation,Zn_distance,Zn1_distance,Zn2_distance
0,A,I3D,89.118035,78.0487,109.942803
1,A,S4K,90.249681,80.462392,110.049008
2,A,Q5V,89.621451,81.153935,108.50354
3,B,R6D,61.392973,66.699269,73.378286
4,C,S450F,48.693866,58.280002,60.337364
5,C,D435F,48.628675,61.736525,56.683942


In [81]:
rfp = sbmlcore.StructuralDistances('tests/5uh6.pdb','resname RFP', 'RFP distance', offsets = {'A': 0, 'B': 0, 'C': -5})

In [82]:
df = rfp.add_feature(df)
df

Unnamed: 0,segid,mutation,Zn_distance,55,RFP distance
0,A,I3D,89.118035,76.681586,76.681586
1,A,S4K,90.249681,76.972108,76.972108
2,A,Q5V,89.621451,76.589376,76.589376
3,B,R6D,61.392973,55.914038,55.914038
4,C,S450F,48.693866,,
5,C,D435F,48.628675,,


In [27]:
df['RFP_distance']

0    76.681586
1    76.972108
2    76.589376
3    55.914038
4          NaN
5          NaN
Name: RFP_distance, dtype: float64

In [110]:
mg1 = sbmlcore.StructuralDistances('tests/5uh6.pdb','resname MG1', 'Mg_distance', offsets = {'A': 0, 'B': 0, 'C': -6})

AssertionError: Atom selection does not exist! Is your selection using the correct MDAnalysis syntax?

In [111]:
df = mg1.add_feature(df)
df

AssertionError: You've already added that feature!

In [70]:
import MDAnalysis
u = MDAnalysis.Universe("tests/5uh6.pdb")

In [79]:
offsets = {'A': 0, 'B': 0, 'C': -6}
for chain in offsets:
    chain_nooffset = u.select_atoms('segid ' + 'A')
    print(chain_nooffset)
    chainGroup = u.select_atoms('segid ' + chain)
    #print(chainGroup)
    chainGroup.residues.resids = chainGroup.residues.resids + offsets[chain]
    print(chainGroup.residues.resids)
    print(type(chainGroup.residues.resids))

<AtomGroup [<Atom 1: N of type N of resname ILE, resid 3 and segid A and altLoc >, <Atom 2: CA of type C of resname ILE, resid 3 and segid A and altLoc >, <Atom 3: C of type C of resname ILE, resid 3 and segid A and altLoc >, ..., <Atom 1702: CG of type C of resname ASN, resid 226 and segid A and altLoc >, <Atom 1703: OD1 of type O of resname ASN, resid 226 and segid A and altLoc >, <Atom 1704: ND2 of type N of resname ASN, resid 226 and segid A and altLoc >]>
[  3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20
  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38
  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56
  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73  74
  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90  91  92
  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108 109 110
 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
 129 130 131 132 133 134

In [91]:
dist = MDAnalysis.lib.distances.distance_array(u.select_atoms("resname MG").positions, u.select_atoms("resname ZN").positions)

In [92]:
dist

array([[41.85556056, 61.94065611]])

In [93]:
dist[0]

array([41.85556056, 61.94065611])

In [94]:
amino_acid_3to1letter = {'CYS': 'C', 'ASP': 'D', 'SER': 'S', 'GLN': 'Q', 'LYS': 'K',
     'ILE': 'I', 'PRO': 'P', 'THR': 'T', 'PHE': 'F', 'ASN': 'N',
     'GLY': 'G', 'HIS': 'H', 'LEU': 'L', 'ARG': 'R', 'TRP': 'W',
     'ALA': 'A', 'VAL':'V', 'GLU': 'E', 'TYR': 'Y', 'MET': 'M'}

In [95]:
def one_letter(row):
            return(amino_acid_3to1letter[row.resname])

In [107]:
mg = sbmlcore.StructuralDistances('tests/5uh6.pdb','resname MG', 'Mg_distance', offsets = {'A': 0, 'B': 0, 'C': -6})
Ca_all = u.select_atoms("name CA")


distances = MDAnalysis.lib.distances.distance_array(reference_com, Ca_all.positions)

Ca_data = {'segid': Ca_all.segids, 'resid': Ca_all.resids,
                   'resname': Ca_all.resnames, distance_name: distances[0]}

NameError: name 'reference_com' is not defined