# Create Sequence File
- Input a DNA or RNA sequence, and this notebook will create a PDB file using tleap
- Software Requirement: 
  - tleap from Ambertools
  

### REMEMBER: change the Jupyter kernel to **`Python (AptaLoop)`**. See our README file for detailed information.

In [None]:
import os, subprocess

data_dir = "../../data"
os.chdir(os.getcwd())

## 1. Create fasta Sequence
Ask user for an sequence and make a fasta sequence file

In [None]:
def is_valid_DNA(seq):
    """
     Check if the input string is a valid DNA sequence.

    Parameters:
        seq (str): A sequence string
    Returns: 
        bool: whether the sequence is valid DNA or not
    """
    return all(nucleotide in "ATCG" for nucleotide in seq)

def is_valid_RNA(seq):
    """
     Check if the input string is a valid RNA sequence.

    Parameters:
        seq (str): A sequence string
    Returns: 
        bool: whether the sequence is valid RNA or not
    """
    return all(nucleotide in "AUCG" for nucleotide in seq)

while True:
    sequence = input("Enter the DNA/RNA sequence: ")
    
    if is_valid_DNA(sequence):
        molecule_type = "DNA"
        print(f"You've entered a valid DNA sequence: {sequence}")
        break
    elif is_valid_RNA(sequence):
        molecule_type = "RNA"
        print(f"You've entered a valid RNA sequence: {sequence}")
        break
    else:
        print("The sequence is neither valid DNA nor RNA. Please try again.")

In [None]:
def write_fasta(seq, filename):
    """
    Write the sequence into a fasta file under given name.

    Parameters:
        seq (str): A sequence string
        filename(str): A file name
    """
    with open(filename, 'w') as f:
        f.write('>Aptamer_Sequence\n')
        f.write(seq + '\n')
        print(f"The sequence is output into sequence.fasta file")

# Replace 'output.fasta' with your preferred output filename
write_fasta(sequence, 'sequence.fasta')

## 2. Create PDB sequence
This part of notebook aims to make a PDB sequence file

In [None]:
def modify_sequence_for_OL15(seq, mol_type):
    """
    Make the sequence match OL15 based on sequence type

    Parameters:
        seq (str): A sequence string
        mol_type (str): either 'DNA' or 'RNA'
    Returns: 
        str: modified sequence
    """
    if mol_type == "DNA":
        return seq.replace("A", "DA ").replace("T", "DT ").replace("C", "DC ").replace("G", "DG ")
    elif mol_type == "RNA":
        return seq.replace("A", "RA ").replace("U", "RU ").replace("C", "RC ").replace("G", "RG ")

modified_sequence = modify_sequence_for_OL15(sequence, molecule_type)

sequence_file = "create_sequence.leap"
with open(sequence_file, "w") as file:
    if molecule_type == "DNA":
        file.write("source leaprc.DNA.OL15\n")  # For DNA with OL15 parameters
    else:
        file.write("source leaprc.RNA.OL15\n")  # For RNA with OL15 parameters
    
    file.write(f"seq = sequence {{{modified_sequence}}}\n")
    file.write("savepdb seq sequence.pdb\n")
    file.write("quit\n")

print("The LEAP setup file for aptamer is created")

In [None]:
subprocess.run(["tleap", "-f", "create_sequence.leap"])