## Summary

1. Create a folder inside the `${DATA_DIR}/user_input` data directory (i.e. `/home/kimlab1/database_data/elaspic/user_input` in the case of our local setup).<br>Set `JOB_ID` to the name of the folder you created.

1. Place a PDB file into that folder.

---

## Imports

In [None]:
from dotenv import load_dotenv

load_dotenv("../.env", override=True)

In [None]:
import io
import os
import random
import tempfile
from pathlib import Path

import pandas as pd
import requests
import sqlalchemy as sa
from kmbio import PDB
from kmtools import structure_tools

In [None]:
pd.set_option("max_columns", 1_000)
pd.set_option("max_rows", 1_000)

## Parameters

In [None]:
NOTEBOOK_DIR = Path("03_submit_mutations").resolve()
NOTEBOOK_DIR.mkdir(exist_ok=True)

NOTEBOOK_DIR

In [None]:
JOB_ID = "mdm2-peptide"

JOB_ID

In [None]:
STRUCTURE_FILE = "3eqs.pdb"

STRUCTURE_FILE

In [None]:
USER_INPUT_DIR = Path(os.environ["DATA_DIR"], "user_input").resolve(strict=True)
JOB_DIR = USER_INPUT_DIR.joinpath(JOB_ID)
JOB_DIR.mkdir(exist_ok=True)

JOB_DIR

In [None]:
ELASPIC_REST_API_URL = "http://192.168.6.241:8080/api/v1/"

ELASPIC_REST_API_URL

In [None]:
DB_USER = os.environ["DB_USER"]
DB_PASSWORD = os.environ["DB_PASSWORD"]
DB_HOST = os.environ["DB_HOST"]
DB_PORT = os.environ["DB_PORT"]

engine = sa.create_engine(f"mysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/elaspic_webserver")

## Helper functions

## Workspace

### Load structure

In [None]:
structure_ref = PDB.load(JOB_DIR.joinpath(STRUCTURE_FILE))

In [None]:
PDB.view_structure(structure_ref)

In [None]:
# structure = PDB.Structure(STRUCTURE_FILE.split('.')[0])
# model = PDB.Model(0)
# structure.add(model)
# chain2 = structure_ref[0]["A"].copy()
# chain2.id = "B"
# chain1 = structure_ref[0]["B"].copy()
# chain1.id = "A"
# model.add(chain1)
# model.add(chain2)

In [None]:
structure = structure_ref

In [None]:
PDB.view_structure(structure)

In [None]:
PDB.save(structure, JOB_DIR.joinpath("input.pdb"))

### Get sequence

In [None]:
sequence = structure_tools.get_chain_sequence(
    structure[0]["A"], if_unknown="replace", unknown_residue_marker=""
)

sequence

### Get mutations

In [None]:
df = structure.to_dataframe()
interface_slice_df = df[
    (df["chain_id"] == "A") & (df["residue_id_1"] >= 51) & (df["residue_id_1"] <= 61)
]

interface_slice_df.head()

In [None]:
mutations = []
for tup in (
    interface_slice_df[["chain_id", "residue_idx", "residue_resname"]]
    .drop_duplicates()
    .itertuples()
):
    wt = structure_tools.constants.AAA_DICT[tup.residue_resname]
    mut = "F" if wt != "E" else "G"
    mutations.append(f"{wt}{tup.residue_idx + 1}{mut}")

mutations

### Remove finished mutations

In [None]:
sql = f"""\
select *
from elaspic_core_mutation_local
where protein_id = '{JOB_ID}' and ddg is not NULL;
"""

finished_df = pd.read_sql_query(sql, engine)
len(finished_df)

In [None]:
finished_df.head()

In [None]:
unfinished_mutations = sorted(set(mutations) - set(finished_df["mutation"]), key=lambda m: int(m[1:-1]))
# random.shuffle(unfinished_mutations)

len(unfinished_mutations)

### Submit jobs

In [None]:
mutations = unfinished_mutations
pdb_mutations = [f"1_{mutation}" for mutation in mutations]
pdb_mutation = ",".join(pdb_mutations)

pdb_mutation[:1000]

In [None]:
job_payload = {
    "api_token": os.environ["API_TOKEN"],
    "job_id": JOB_ID,
    "job_type": "local",
    "job_email": "alexey.strokach@kimlab.org",
    "mutations": [
        {
            "mutations": pdb_mutation,
            "protein_id": JOB_ID,
            "structure_file": "input.pdb",
        }
    ],
}

In [None]:
r = requests.post(ELASPIC_REST_API_URL, json=job_payload)
if not r.ok:
    print(f"Bad response from ELASPIC REST server: {r}")
else:
    status = r.json().get("status", None)
    print(f"status: {status}")

In [None]:
ELASPIC_REST_API_URL = "http://localhost:8055/"

ELASPIC_REST_API_URL

In [None]:
raise Exception("done")

In [None]:
!ls ../../elaspic2/notebooks/07_benchmarks

In [None]:
df = pd.read_csv("../../elaspic2/notebooks/07_benchmarks/el2_rbd_affinity.csv")

In [None]:
df.head()

In [None]:
if "el2_pred" not in finished_df:
    finished_df = finished_df.merge(df[["mutation", "el2_pred"]], how="left", on=["mutation"])

In [None]:
# assert finished_df["el2_pred"].notnull().all()

In [None]:
finished_df[finished_df["el2_pred"].isnull()]

In [None]:
# with engine.connect() as conn:
#     for tup in finished_df.itertuples():
#         if pd.isnull(tup.el2_pred):
#             continue
#         sql = f"""\
# update elaspic_interface_mutation_local
# set el2_score = {tup.el2_pred}
# where protein_id = '{tup.protein_id}' and mutation = '{tup.mutation}'
# """
#         conn.execute(sql)

In [None]:
http://elaspic.ccbr.proteinsolver.org/result/spike-sars2-in/6zoy-rbd.pdb.F165T/?p=h11709

In [None]:
import re


def getPnM(p):
    """Return protein and mutation from the format PROT.MUT."""
    protnMut = re.match(r"(.+)\.([A-Za-z]{1}[0-9]+[A-Za-z]{1}_?[0-9]*)$", p)
    if not protnMut:
        return None, None
    return protnMut.group(1).upper(), protnMut.group(2).upper()

In [None]:
path = "/result/spike-sars2-in/6zoy-rbd.pdb.F165T/"

currentIDs = path.split("/")

currentIDs

In [None]:
getPnM(currentIDs[3])

In [None]:
xxx ="http://elaspic.ccbr.proteinsolver.org/result/spike-sars2-in/6zoy-rbd.pdb.F165T/?p=h11709"

In [None]:
!ls -al

In [None]:
os.makedirs()

In [None]:
import random

"%06x" % random.randint(1, 16777215)

In [None]:
import uuid

uuid.uuid4().hex[:12]

In [None]:
0o002