In [62]:
from __future__ import print_function
import sys
import glob
import pandas as pd
import numpy as np

#平均値計算
def output_averaged_B_factor(sum_Bf, n, parts_for_Bf):
    if n != 0:
        ave_Bf = sum_Bf / n
#        print('averaged B factor for %s = %f' %(parts_for_Bf, ave_Bf))
        return ave_Bf

#ファイル名を入力して温度因子の平均値をデータフレーム化
def calc_averaged_B_factors(pdb_name):
    fl = open(pdb_name)
    sum_protein_Bf = 0.
    sum_WAT_Bf = 0.
    sum_HETATM_Bf = 0.
    n_protein = 0
    n_WAT= 0
    n_HETATM = 0
    for line in fl:
        if line.startswith('ATOM') or line.startswith('HETATM'):
            residue = line[17:20]
            Bf = float(line[61:66])

            if residue in ['WAT','HOH']:
                sum_WAT_Bf += Bf
                n_WAT += 1
            elif line.startswith('ATOM'):
                sum_protein_Bf += Bf
                n_protein += 1
                 
            else:
                sum_HETATM_Bf += float(line[61:66])
                n_HETATM += 1
    
    ls = []
    ls.append(pdb_name[-8:-4])
    ls.append(output_averaged_B_factor(sum_protein_Bf, n_protein, 'protein'))
    ls.append(output_averaged_B_factor(sum_WAT_Bf, n_WAT, 'waters'))
    ls.append(output_averaged_B_factor(sum_HETATM_Bf, n_HETATM, 'ligand (HETATMs)'))
    
    df2 = pd.DataFrame([ls], columns=['PDBID', 'Bf_protein', 'Bf_waters', 'Bf_ligand'])
    
    return df2

#データフレーム作成
df = pd.DataFrame(columns=['PDBID', 'Bf_protein', 'Bf_waters', 'Bf_ligand'])
gl = glob.glob('../TYK2/*.pdb')
for p in gl:
    pdb_name = p
    df = df.append(calc_averaged_B_factors(pdb_name), ignore_index=True)
print(df)

#csvに出力
df.to_csv('../TYK2/TYK2_B_factor.csv')

   PDBID  Bf_protein  Bf_waters  Bf_ligand
0   3LXN   19.263524  21.285909  28.421538
1   3LXP   19.752938  34.739368  22.536923
2   3NYX   30.461047  22.274286  20.869024
3   3NZ0   26.595353  29.820751  17.924348
4   3ZON   25.379496  27.537561  18.320000
5   4E1Z   47.126519  41.334146  34.373824
6   4E20   48.004676  47.940421  35.392593
7   4GFO   48.281508  42.182051  64.332619
8   4GIH   44.055532  43.383529  35.382609
9   4GII   52.950350  44.655385  46.874400
10  4GJ2   42.130938  37.401809  40.004231
11  4GJ3   41.232762  35.112879  35.225357
12  4GVJ   45.099150  41.474607  33.745455
13  4OLI   59.718621  50.743750  64.221364
14  4PY1   44.805945  44.653136  50.383810
15  4WOV   30.636511  39.928444  43.264309
16  5C01   29.295238  32.166898  26.401026
17  5C03   25.827630  32.629295  27.773983
18  5F1Z   32.987997  30.204710  30.246154
19  5F20   56.528855  61.036774  67.564167
20  5TKD   28.708724  35.686984  24.643711
21  5WAL   37.166332  32.424891  24.202609
