### Downloading the model

Download the folder with the Bert model from Google disk

In [None]:
import gdown

url = "https://drive.google.com/drive/folders/12JCOBAEeAcym9U_HWQ3HMpebgln6Ys3W"
gdown.download_folder(url, use_cookies=False)

Specify the path to the Bert model

In [None]:
model_path = "/content/models"

### Installing required packages and libraries

Install the transformers and accelerate packages

In [None]:
!pip install transformers
!pip install accelerate

Import the necessary libraries

In [None]:
from transformers import pipeline

### Declaring a function that predicts intensity from a peptide sequence

The function predict_umami_intensity takes as input a list of strings `list[str]`. The output is a list of intensities `list[float]`. Example usage:

For one peptide:
```
predict_umami_intensity(["ALPAEV"])
```
For several peptides:
```
predict_umami_intensity(["ALPAEV", "AAAA", "AAEEAA"])
```


In [None]:
def predict_umami_intensity(peptides: list[str]) -> list[float]:
    peptides = [" ".join(pept) for pept in peptides]
    pipe = pipeline('text-classification', model=model_path, function_to_apply="none")
    result = pipe(peptides)
    return [pred["score"] for pred in result]

You can test your peptide here:

In [None]:
predict_umami_intensity(["EEFD"])

### Construction a heat map

Install the necessary libraries for python

In [None]:
!pip install seaborn
!pip install pandas

Connect the necessary python libraries

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
aminoacid_dictionary = ["EE", "EG", "EK", "YG"]
aminoacid_dictionary_length = len(aminoacid_dictionary)

peptides = []
for aminoacid in aminoacid_dictionary:
    for i in range(aminoacid_dictionary_length):
        new_peptide = [aminoacid, aminoacid_dictionary[i], aminoacid + aminoacid_dictionary[i]]
        peptides.append(new_peptide)

peptides_for_ml = []
str_peptide_index = 2
for i in range(len(peptides)):
    peptides_for_ml.append(peptides[i][str_peptide_index])

print(peptides_for_ml)

c_values = predict_umami_intensity(peptides_for_ml)
c_formatted_values = ['%.2f' % elem for elem in c_values]

i = 0
for peptide in peptides:
    peptide.append(c_formatted_values[i])
    i += 1

d = {
    'aminoacid_1': [],
    'aminoacid_2': [],
    'score': []
}

for peptide in peptides:
    d['aminoacid_1'].append(peptide[0])
    d['aminoacid_2'].append(peptide[1])
    d['score'].append(float(peptide[3]))

df = pd.DataFrame(d)
# pivot the dataframe from long to wide form
result = df.pivot(index="aminoacid_1", columns="aminoacid_2", values="score")
sns.heatmap(result, annot=True, fmt="g", cmap='viridis')
plt.show()