In [5]:
import pandas as pd
from collections import defaultdict

def calculate_transition_matrix(sequence):
    transitions = defaultdict(lambda: defaultdict(int))
    for i in range(len(sequence) - 1):
        current_state = sequence[i]
        next_state = sequence[i + 1]
        transitions[current_state][next_state] += 1

    transition_matrix = {}
    for current_state, next_states in transitions.items():
        total_transitions = sum(next_states.values())
        transition_matrix[current_state] = {state: count / total_transitions for state, count in next_states.items()}
    
    return transition_matrix

sequence = input("Enter the DNA sequence: ").upper()

transition_matrix = calculate_transition_matrix(sequence)

states = sorted(set(sequence))  # Get unique states from the sequence
matrix_df = pd.DataFrame(0.0, index=states, columns=states)

for current_state, next_states in transition_matrix.items():
    for next_state, prob in next_states.items():
        matrix_df.loc[current_state, next_state] = prob

print("\nMarkov Transition Matrix (Table Format):")
print(matrix_df)

Enter the DNA sequence:  AAACATAGGGATCATTCCGCGCGTAAGTTTT



Markov Transition Matrix (Table Format):
          A         C         G         T
A  0.333333  0.111111  0.222222  0.333333
C  0.333333  0.166667  0.500000  0.000000
G  0.142857  0.285714  0.285714  0.285714
T  0.250000  0.250000  0.000000  0.500000
