# amino acid composition

In [4]:
from collections import Counter

def calculate_residue_composition(sequence):
  """
  This function calculates the residue composition (count and frequency) of a given amino acid sequence,
  handling invalid amino acid characters.

  Args:
      sequence: A string representing the amino acid sequence.

  Returns:
      A dictionary containing the count and frequency for each valid amino acid residue.
  """
  # Define valid amino acids (modify if needed)
  valid_amino_acids = set("ACDEFGHIKLMNPQRSTVWY")

  # Filter out invalid characters and count occurrences of valid ones
  amino_acid_counts = Counter(aa for aa in sequence if aa in valid_amino_acids)

  # Get the total number of valid amino acids
  total_count = sum(amino_acid_counts.values())

  # Calculate the frequency for each valid amino acid
  residue_composition = {aa: (count / total_count) * 100 for aa, count in amino_acid_counts.items()}

  return residue_composition, amino_acid_counts  # Return both dictionaries

# Example usage
sequence = "PLEASEREPRESENTTHEINFORMATIONX"

residue_composition, amino_acid_counts = calculate_residue_composition(sequence)

# Print the results
print("Residue composition (count):", amino_acid_counts)
print("Residue composition (frequency):", residue_composition)



Residue composition (count): Counter({'E': 6, 'R': 3, 'N': 3, 'T': 3, 'P': 2, 'A': 2, 'S': 2, 'I': 2, 'L': 1, 'H': 1, 'F': 1, 'M': 1})
Residue composition (frequency): {'P': 7.4074074074074066, 'L': 3.7037037037037033, 'E': 22.22222222222222, 'A': 7.4074074074074066, 'S': 7.4074074074074066, 'R': 11.11111111111111, 'N': 11.11111111111111, 'T': 11.11111111111111, 'H': 3.7037037037037033, 'I': 7.4074074074074066, 'F': 3.7037037037037033, 'M': 3.7037037037037033}


#hydrophobic amino acid

In [5]:
from collections import Counter

def calculate_residue_composition(sequence):
  """
  This function calculates the residue composition (count and frequency) of a given amino acid sequence,
  handling invalid amino acid characters.

  Args:
      sequence: A string representing the amino acid sequence.

  Returns:
      A tuple containing two dictionaries:
          - residue_composition: Count and frequency for each valid amino acid.
          - amino_acid_counts: Count for each valid amino acid.
  """
  # Define valid amino acids and properties (modify if needed)
  valid_amino_acids = set("ACDEFGHIKLMNPQRSTVWY")
  hydrophobic_residues = set("ACFILMPVW")
  hydrophilic_residues = set("DEHKNQRSTY")  # Adjust based on your hydrophobicity scale

  # Filter out invalid characters and count occurrences
  amino_acid_counts = Counter(aa for aa in sequence if aa in valid_amino_acids)

  # Get the total number of valid amino acids
  total_count = sum(amino_acid_counts.values())

  # Calculate residue composition (count and frequency)
  residue_composition = {aa: (count / total_count) * 100 for aa, count in amino_acid_counts.items()}

  # Calculate hydrophobic and hydrophilic counts
  hydrophobic_count = sum(amino_acid_counts[aa] for aa in hydrophobic_residues)
  hydrophilic_count = sum(amino_acid_counts[aa] for aa in hydrophilic_residues)

  # Calculate hydrophobic and hydrophilic frequencies (adjust if needed)
  hydrophobic_freq = (hydrophobic_count / total_count) * 100
  hydrophilic_freq = (hydrophilic_count / total_count) * 100

  # Return results
  return residue_composition, amino_acid_counts, hydrophobic_count, hydrophobic_freq, hydrophilic_count, hydrophilic_freq

def main():
  """
  Example usage of the calculate_residue_composition function.
  """
  sequence = "PLEASEREPRESENTTHEINFORMATION"

  # Calculate residue composition
  residue_composition, amino_acid_counts, hydrophobic_count, hydrophobic_freq, hydrophilic_count, hydrophilic_freq = calculate_residue_composition(sequence)

  # Print the results
  print("Residue composition (count):", amino_acid_counts)
  print("Residue composition (frequency):", residue_composition)
  print("Hydrophobic count:", hydrophobic_count)
  print("Hydrophobic frequency:", hydrophobic_freq, "%")
  print("Hydrophilic count:", hydrophilic_count)
  print("Hydrophilic frequency:", hydrophilic_freq, "%")

if __name__ == "__main__":
  main()


Residue composition (count): Counter({'E': 6, 'R': 3, 'N': 3, 'T': 3, 'P': 2, 'A': 2, 'S': 2, 'I': 2, 'L': 1, 'H': 1, 'F': 1, 'M': 1})
Residue composition (frequency): {'P': 7.4074074074074066, 'L': 3.7037037037037033, 'E': 22.22222222222222, 'A': 7.4074074074074066, 'S': 7.4074074074074066, 'R': 11.11111111111111, 'N': 11.11111111111111, 'T': 11.11111111111111, 'H': 3.7037037037037033, 'I': 7.4074074074074066, 'F': 3.7037037037037033, 'M': 3.7037037037037033}
Hydrophobic count: 9
Hydrophobic frequency: 33.33333333333333 %
Hydrophilic count: 18
Hydrophilic frequency: 66.66666666666666 %


In [16]:
from collections import Counter

def calculate_residue_composition(sequence):
  """
  This function calculates the residue composition (count and frequency) of a given amino acid sequence,
  handling invalid amino acid characters.

  Args:
      sequence: A string representing the amino acid sequence.

  Returns:
      A tuple containing two dictionaries:
          - residue_composition: Count and frequency for each valid amino acid.
          - amino_acid_counts: Count for each valid amino acid.
  """
  # Define valid amino acids and properties (modify if needed)
  valid_amino_acids = set("ACDEFGHIKLMNPQRSTVWY")
  hydrophobic_residues = set("ALVICMF")
  hydrophilic_residues = set("RKEDNQH")
  neutral_residues = ("PSTGWY")  

  # Filter out invalid characters and count occurrences
  amino_acid_counts = Counter(aa for aa in sequence if aa in valid_amino_acids)

  # Get the total number of valid amino acids
  total_count = sum(amino_acid_counts.values())

  # Calculate residue composition (count and frequency)
  residue_composition = {aa: (count / total_count) * 100 for aa, count in amino_acid_counts.items()}

  # Calculate hydrophobic and hydrophilic counts
  hydrophobic_count = sum(amino_acid_counts[aa] for aa in hydrophobic_residues)
  hydrophilic_count = sum(amino_acid_counts[aa] for aa in hydrophilic_residues)
  neutral_count = sum(amino_acid_counts[aa] for aa in neutral_residues)  

  # Calculate hydrophobic and hydrophilic frequencies (adjust if needed)
  hydrophobic_freq = (hydrophobic_count / total_count) * 100
  hydrophilic_freq = (hydrophilic_count / total_count) * 100
  neutral_freq = (neutral_count / total_count) * 100  

  # Return results
  return residue_composition, amino_acid_counts, hydrophobic_count, hydrophobic_freq, hydrophilic_count, hydrophilic_freq, neutral_count, neutral_freq

def analyze_sequence_list(sequences):
  """
  This function analyzes a list of amino acid sequences and returns a dictionary containing
  composition information for each sequence.

  Args:
      sequences: A list of strings representing amino acid sequences.

  Returns:
      A dictionary where keys are sequence IDs (e.g., sequence index) and values are tuples containing
      the results from calculate_residue_composition for each sequence.
  """
  sequence_data = {}
  for i, sequence in enumerate(sequences):
    # Use sequence index or a more descriptive ID for the key
    sequence_id = f"Sequence {i+1}"
    residue_composition, amino_acid_counts, hydrophobic_count, hydrophobic_freq, hydrophilic_count, hydrophilic_freq, neutral_count, neutral_freq = calculate_residue_composition(sequence)
    sequence_data[sequence_id] = (residue_composition, amino_acid_counts, hydrophobic_count, hydrophobic_freq, hydrophilic_count, hydrophilic_freq, neutral_count, neutral_freq)
  return sequence_data

def main():
  """
  Example usage of the calculate_residue_composition and analyze_sequence_list functions.
  """
  sequences = [
      "PLEASEREPRESENTTHEINFORMATION",
      "GLOBINISAFUNCTIONALPROTEIN",
  ]

  # Analyze sequences
  sequence_analysis = analyze_sequence_list(sequences)

  # Print results for each sequence (modify to suit your needs)
  for sequence_id, data in sequence_analysis.items():
    print(f"\n** {sequence_id} Analysis **")
    print("Residue composition (count):", data[1])
    print("Hydrophobic count:", data[2])
    print("Hydrophobic frequency:", data[3], "%")    
    print("Hydrophilic count:", data[4])
    print("Hydrophilic frequency:", data[5], "%")
    print("Neutral count:", data[6])  
    print("Neutral freq:", data[7], "%")  

      

if __name__ == "__main__":
  main()



** Sequence 1 Analysis **
Residue composition (count): Counter({'E': 6, 'R': 3, 'N': 3, 'T': 3, 'P': 2, 'A': 2, 'S': 2, 'I': 2, 'L': 1, 'H': 1, 'F': 1, 'M': 1})
Hydrophobic count: 7
Hydrophobic frequency: 25.925925925925924 %
Hydrophilic count: 13
Hydrophilic frequency: 48.148148148148145 %
Neutral count: 7
Neutral freq: 25.925925925925924 %

** Sequence 2 Analysis **
Residue composition (count): Counter({'I': 4, 'N': 4, 'L': 2, 'A': 2, 'T': 2, 'G': 1, 'S': 1, 'F': 1, 'C': 1, 'P': 1, 'R': 1, 'E': 1})
Hydrophobic count: 10
Hydrophobic frequency: 47.61904761904761 %
Hydrophilic count: 6
Hydrophilic frequency: 28.57142857142857 %
Neutral count: 5
Neutral freq: 23.809523809523807 %
