In [None]:
import os
import subprocess
from Bio import SeqIO
from Bio.Align.Applications import ClustalOmegaCommandline

def parse_fasta_file(fasta_file):
    """Parse a FASTA file and return a dictionary of sequences"""
    sequences = {}
    with open(fasta_file, 'r') as f:
        for record in SeqIO.parse(f, 'fasta'):
            sequences[record.id] = str(record.seq)
    return sequences

def calculate_conservation_score(aligned_sequence):
    """Calculate the conservation score for an aligned sequence"""
    num_sequences = len(aligned_sequence)
    seq_length = len(aligned_sequence[0])
    conservation_score = []
    for i in range(seq_length):
        column = [aligned_sequence[j][i] for j in range(num_sequences)]
        num_gaps = column.count('-')
        non_gaps = [x for x in column if x != '-']
        if len(set(non_gaps)) == 1:
            conservation_score.append(1.0)
        else:
            counts = {}
            for nt in non_gaps:
                counts[nt] = counts.get(nt, 0) + 1
            max_count = max(counts.values())
            conservation_score.append(max_count / (num_sequences - num_gaps))
    return conservation_score

def main(fasta_file):
    # Parse the FASTA file
    sequences = parse_fasta_file(fasta_file)

    # Align the sequences using Clustal Omega
    clustalomega_cline = ClustalOmegaCommandline(infile=fasta_file, outfile='aligned.fasta', verbose=True, auto=True)
    stdout, stderr = clustalomega_cline()

    # Parse the resulting alignment
    aligned_sequences = {}
    with open('aligned.fasta', 'r') as f:
        for record in SeqIO.parse(f, 'fasta'):
            aligned_sequences[record.id] = str(record.seq)

    # Calculate the conservation score for each sequence
    for seq_name, sequence in sequences.items():
        aligned_sequence = aligned_sequences[seq_name]
        conservation_score = calculate_conservation_score([aligned_sequence])
        print(f'{seq_name}: {conservation_score}')

if __name__ == '__main__':
    main('lncRNA.fa')

    

In [None]:
from Bio import SeqIO, AlignIO
from Bio.Align.Applications import ClustalOmegaCommandline
import subprocess
import os

def calculate_conservation_score(alignment):
    """Calculate the conservation score for each column in the alignment"""
    conservation_scores = []
    for column in range(alignment.get_alignment_length()):
        column_scores = {}
        for record in alignment:
            base = record.seq[column]
            if base in column_scores:
                column_scores[base] += 1
            else:
                column_scores[base] = 1
        conservation_scores.append(sum(column_scores.values()) / len(column_scores))
    return conservation_scores

def main(fasta_file):
    # Run Clustal Omega to align the sequences
    clustalomega_cline = ClustalOmegaCommandline(infile=fasta_file, outfile='aligned.fasta', verbose=True, auto=True)
    stdout, stderr = clustalomega_cline()
    
    # Parse the resulting alignment
    alignment = AlignIO.read('aligned.fasta', 'fasta')
    
    # Calculate the conservation score
    conservation_scores = calculate_conservation_score(alignment)
    
    # Print the conservation score for each sequence
    for record, score in zip(alignment, conservation_scores):
        print(f'{record.id}: {score}')

if __name__ == '__main__':
    fasta_file = 'lncRNA.fa'
    main(fasta_file)


In [None]:
from Bio.Align.Applications import ClustalOmegaCommandline
from Bio import AlignIO
from Bio.Seq import Seq
from Bio.Alphabet import generic_dna
import subprocess
import os

def parse_fasta_file(fasta_file):
    """
    Parse a fasta file and return a dictionary of sequence names and sequences
    """
    sequences = {}
    with open(fasta_file, 'r') as f:
        seq_name = ''
        for line in f:
            if line.startswith('>'):
                seq_name = line[1:].strip()
                sequences[seq_name] = ''
            else:
                sequences[seq_name] += line.strip()
    return sequences

def calculate_conservation_score(alignment):
    """
    Calculate conservation score for each position in an alignment
    """
    num_seqs = len(alignment)
    score = []
    for i in range(len(alignment[0])):
        column = alignment[:, i]
        counts = {}
        for base in column:
            if base in counts:
                counts[base] += 1
            else:
                counts[base] = 1
        entropy = 0
        for base in counts:
            frequency = counts[base] / num_seqs
            if frequency > 0:
                entropy += frequency * (1 - frequency)
        score.append(1 - entropy)
    return score

# Specify the path to the input fasta file
fasta_file = 'lncRNA.fa'

# Run Clustal Omega to align the sequences
clustalomega_cline = ClustalOmegaCommandline(infile=fasta_file, outfile='aligned.fasta', verbose=True, auto=True)
stdout, stderr = clustalomega_cline()

# Parse the resulting alignment
alignment = AlignIO.read('aligned.fasta', 'fasta')

# Calculate the conservation score for each position in the alignment
conservation_score = calculate_conservation_score(alignment)

# Print the conservation score for each sequence
for i, seq_record in enumerate(alignment):
    print(f'{seq_record.id}:')
    for j, score in enumerate(conservation_score):
        print(f'Position {j}: {score:.2f}')

    print()
