# MHC-Fine Colab

## Setup

In [1]:
# !git clone https://bitbucket.org/abc-group/mhc-fine.git
# !cd mhc-fine/

In [2]:
import torch
if not torch.cuda.is_available():
    print("Please check your setup of GPU.")

In [3]:
from src import preprocess, model
import pandas as pd
import os
import gdown

In [4]:
# Load the model
model_path = "data/model/mhc_fine_weights.pt"
if not os.path.exists(model_path):
    file_id = "1gz8uF8DKE0CzyX_WeDGOX7xP69LjpaZT"
    gdown.download(f"https://drive.google.com/uc?id={file_id}", model_path)

In [5]:
# Make msa generator executable
!chmod +x a3m_generation/msa_run

## Input your data

If you the notebook for not the first time, change this ID to save the MSA data you queried from the previous run, otherwise it will be overwritten.

In [6]:
unique_id = "6VRN_A"
protein_sequence = "GSHSMRYFFTSVSRPGRGEPRFIAVGYVDDTQFVRFDSDAASQRMEPRAPWIEQEGPEYWDGETRKVKAHSQTHRVDLGTLRGYYNQSEAGSHTVQRMYGCDVGSDWRFLRGYHQYAYDGKDYIALKEDLRSWTAADMAAQTTKHKWEAAHVAEQLRAYLEGTCVEWLRRYLENGKETLQRT"
peptide_sequence = "HMTEVVRHC"

## Get the MSA data

In [7]:
a3m_path = os.path.join(os.getcwd(), 'data', 'msa', unique_id, 'mmseqs', 'aggregated.a3m')
if not os.path.exists(a3m_path):
    preprocess.get_a3m(protein_sequence, a3m_path, unique_id)

## Preprocess the data

In [8]:
np_sample = preprocess.preprocess_for_inference(protein_sequence, peptide_sequence, a3m_path)

Reading a3m file...
Processing protein chain...
GSHSMRYFFTSVSRPGRGEPRFIAVGYVDDTQFVRFDSDAASQRMEPRAPWIEQEGPEYWDGETRKVKAHSQTHRVDLGTLRGYYNQSEAGSHTVQRMYGCDVGSDWRFLRGYHQYAYDGKDYIALKEDLRSWTAADMAAQTTKHKWEAAHVAEQLRAYLEGTCVEWLRRYLENGKETLQRT
Processing peptide chain...
HMTEVVRHC
Mering features...


## Run AlphaFold, display metrics and save prediction

In [9]:
my_model = model.Model()

In [10]:
my_model.inference(np_sample, unique_id)

Running inference...
Writing predicted structure:  ./output/6VRN_A.pdb


{'mean_plddt': 98.18183135986328, 'mean_masked_plddt': 96.92658318413629}