In [2]:
import re
import pandas as pd
import numpy as np
from typing import List

def process_MD_out(file: str, 
    terms: List = ['NSTEP', 'TIME', 'TEMP', 'PRESS', 'Etot',
             'EKtot', 'EPtot', 'EELEC', 'VDWAALS']):
    '''
    Procesa un archivo .out de minimización de AMBER
    '''
    
    results = {}
    with open(file, 
              encoding="latin1") as f:
        lines = f.readlines()

    for term in terms:
        results[term] = []
        regex = re.compile(f"(?<=({term})).*")
        for line in lines:
            match = regex.search(line)
            try:
                match = match.group(0)\
                    .split('=')[1].split()[0]
                results[term].append(float(match))
            except: None

    df = pd.DataFrame(results)[:-1]
    return df

def process_MIN_out(file: str, 
    terms: List = ['NSTEP', 'ENERGY', 'RMS', 'NAME', 'NUMBER']):
    
    '''
    Procesa un archivo .out de dinámica de AMBER
    '''
    with open(file, encoding="latin1") as f:
        lines = f.readlines()
    
    lista = []
    header = ['NSTEP', 'ENERGY', 'RMS', 'GMAX', 'NAME', 'NUMBER']
    regex = re.compile(f".*ENERGY.*")
    for i, line in enumerate(lines):
        match = regex.search(line)
        if match:
            datos = lines[i+1].lower().split()
            lista.append(datos)
    # Convierte las columnas num'ericas a numeric
    num_cols = header.copy()
    num_cols.remove('NAME')
    df_min = pd.DataFrame(lista, columns=header)[num_cols].\
            apply(pd.to_numeric, errors='coerce', axis=1)
    return df_min

def _get_seq(ranges, x, sep = ' '):
	from itertools import chain
	lista = [list( range(valor[0], valor[1] + 1) ) for valor in ranges]
	# Se obtiene la lista de residuos, incluida en formato de cadena de texto
	seq_residues = list(chain.from_iterable(lista))
	seq_residues_str = sep.join(str(e) for e in seq_residues)
	if x == 'str':
		final_seq = seq_residues_str
	elif x == 'list':
		final_seq = seq_residues
	else: 
		final_seq = "Especifica el tipo de retorno: 'str' o 'list'"
	return(final_seq)


def get_pocket_residues(x='str', sep = ' '):
	# Pocket (4FKW y su ligando a 7 A): 8-19, 30-33, 64-65, 79-90, 129-134, 143-146
	# resid 8 to 19 30 to 33 64 65 79 to 90 129 to 134 143 to 146
	pocket_rangeResidues = [[8,19], [30,33], [64,65], [79,90], [129,134], [143,146]]
	final_seq = _get_seq(pocket_rangeResidues, x, sep)
	return(final_seq)

def get_pisani_residues(x='str', sep = ' '):
	pisiani_rangeResidues = [ [4,12], [17, 24], [29,34], [46,55], [66,71], [76,81],  
							[87,93], [101, 120], [121, 135], [140, 150], [182, 194], [277, 282]]
	final_seq = _get_seq(pisiani_rangeResidues, x, sep)
	return(final_seq)