# Imports

In [1]:
import requests
import pandas as pd
import sqlalchemy as sa

from IPython.display import display

from pprint import pprint

import kmtools
import kmtools.df_tools

print2 = kmtools.df_tools.print2

In [2]:
pd.set_option("display.max_columns", 300)
pd.options.mode.chained_assignment = None  # default='warn'

# Load data

In [3]:
JOBSUBMITTER_URL = 'http://192.168.6.122'
# JOBSUBMITTER_URL = 'http://elaspic.kimlab.org'

In [4]:
engine = sa.create_engine('mysql://ela***REMOVED***:3306/elaspic_webserver')

## local_jobs

In [5]:
sql_query = """
select distinct jobId, localId, protein, m.mut, m.chain
from jobs j
join job_to_mut j2m on (j2m.job_id = j.jobID)
join muts m on (m.id = j2m.mut_id)
left join elaspic_core_model_local e ON (e.protein_id = localId)
WHERE localID is not NULL AND (status != 'done' or path_to_data = '' or path_to_data is null)
"""
local_df = pd.read_sql_query(sql_query, engine)

In [6]:
display(local_df)
print(local_df.shape[0])

Unnamed: 0,jobId,localId,protein,mut,chain
0,0ae2c1,0ae2c1,0ae2c1,Y97S,0.0
1,1c11fb,1c11fb,1c11fb,Q102Y,0.0
2,1f15a6,1f15a6,1f15a6,R36K,0.0
3,2d9cb8,2d9cb8,2d9cb8,A33T,
4,37a314,37a314,37a314,N35S,0.0
5,3f4621,3f4621,3f4621,L112I,0.0
6,5aa21d,5aa21d,5aa21d,N35S,0.0
7,65978f,65978f,65978f,H100T,0.0
8,866261,866261,866261,N35S,0.0
9,86b705,86b705,86b705,N35S,0.0


14


In [7]:
local_jobs = []
for job_id, df_gp in local_df.groupby('jobId'):
    df_gp['structure_file'] = "input.pdb"
    df_gp['mutations'] = (
        df_gp[['chain', 'mut']]
        .apply(lambda x: '{}_{}'.format(int(x[0] if pd.notnull(x[0]) else 0) + 1, x[1]), axis=1)
    )
    local_jobs.append({
        "secret_key": "J6;u.950z5750Q#344vy7*idT1FBs0",
        "job_id": job_id,
        "job_email": "kimlab.webserver@gmail.com",
        "job_type": "local",
        "mutations": (
            df_gp
            .rename(columns={'protein': 'protein_id'})
            [['protein_id', 'mutations', 'structure_file']]
            .to_dict(orient='records')
        )
    })

# ---
pprint(local_jobs[:3])
print()
print2("Number of errored local mutations:", len(local_jobs))

[{'job_email': 'kimlab.webserver@gmail.com',
  'job_id': '0ae2c1',
  'job_type': 'local',
  'mutations': [{'mutations': '1_Y97S',
                 'protein_id': '0ae2c1',
                 'structure_file': 'input.pdb'}],
  'secret_key': 'J6;u.950z5750Q#344vy7*idT1FBs0'},
 {'job_email': 'kimlab.webserver@gmail.com',
  'job_id': '1c11fb',
  'job_type': 'local',
  'mutations': [{'mutations': '1_Q102Y',
                 'protein_id': '1c11fb',
                 'structure_file': 'input.pdb'}],
  'secret_key': 'J6;u.950z5750Q#344vy7*idT1FBs0'},
 {'job_email': 'kimlab.webserver@gmail.com',
  'job_id': '1f15a6',
  'job_type': 'local',
  'mutations': [{'mutations': '1_R36K',
                 'protein_id': '1f15a6',
                 'structure_file': 'input.pdb'}],
  'secret_key': 'J6;u.950z5750Q#344vy7*idT1FBs0'}]

Number of errored local mutations:                          14


In [8]:
for data_in in local_jobs:
    r = requests.post('{}:8000/elaspic/api/1.0/'.format(JOBSUBMITTER_URL), json=data_in)
    if not r.ok:
        print("Bad response from jobsubmitter server: {}".format(r))
        continue
    status = r.json().get('status', None)
    print('status: {}'.format(status))
    print()

status: submitted

status: submitted

status: submitted

status: submitted

status: submitted

status: submitted

status: submitted

status: submitted

status: submitted

status: submitted

status: submitted

status: submitted

status: submitted

status: submitted



## database_jobs

In [9]:
sql_query = """
select distinct jobId, protein, m.mut
from jobs j
join job_to_mut j2m on (j2m.job_id = j.jobID)
join muts m on (m.id = j2m.mut_id)
join elaspic.uniprot_domain ud ON (ud.uniprot_id = protein)
join elaspic.uniprot_domain_model udm USING (uniprot_domain_id)
WHERE localID is NULL AND status != 'done' AND elaspic.mutation_in_domain(m.mut, udm.model_domain_def)
"""
database_df = pd.read_sql_query(sql_query, engine)

In [10]:
display(database_df.head())
print(database_df.shape[0])

Unnamed: 0,jobId,protein,mut
0,04b33c,Q8WZ42,I12293F
1,0b2cfb,Q8WZ42,I12293F
2,0d63d7,P0CH28,K637N
3,0d63d7,P0CH28,K637Q
4,0d63d7,P0CH28,R650E


12914


In [11]:
database_jobs = []
for job_id, df_gp in database_df.groupby('jobId'):
    database_jobs.append({
        "secret_key": "J6;u.950z5750Q#344vy7*idT1FBs0",
        "job_id": job_id,
        "job_email": "strokach@cs.toronto.edu",
        "job_type": "database",
        "mutations": (
            df_gp
            .rename(columns={'protein': 'protein_id', 'mut': 'mutations'})
            [['protein_id', 'mutations']]
            .to_dict(orient='records')
        )
    })

# ---
pprint(database_jobs[:3])
print()
print2("Number of errored database mutations:", len(database_jobs))

[{'job_email': 'strokach@cs.toronto.edu',
  'job_id': '04b33c',
  'job_type': 'database',
  'mutations': [{'mutations': 'I12293F', 'protein_id': 'Q8WZ42'}],
  'secret_key': 'J6;u.950z5750Q#344vy7*idT1FBs0'},
 {'job_email': 'strokach@cs.toronto.edu',
  'job_id': '0b2cfb',
  'job_type': 'database',
  'mutations': [{'mutations': 'I12293F', 'protein_id': 'Q8WZ42'}],
  'secret_key': 'J6;u.950z5750Q#344vy7*idT1FBs0'},
 {'job_email': 'strokach@cs.toronto.edu',
  'job_id': '0d63d7',
  'job_type': 'database',
  'mutations': [{'mutations': 'K637N', 'protein_id': 'P0CH28'},
                {'mutations': 'K637Q', 'protein_id': 'P0CH28'},
                {'mutations': 'R650E', 'protein_id': 'P0CH28'},
                {'mutations': 'F653W', 'protein_id': 'P0CH28'},
                {'mutations': 'H676E', 'protein_id': 'P0CH28'},
                {'mutations': 'H676Q', 'protein_id': 'P0CH28'},
                {'mutations': 'K614E', 'protein_id': 'P0CH28'},
                {'mutations': 'K614Q', 'protei

In [12]:
for data_in in database_jobs:
    r = requests.post('{}:8000/elaspic/api/1.0/'.format(JOBSUBMITTER_URL), json=data_in)
    if not r.ok:
        print("Bad response from jobsubmitter server: {}".format(r))
        continue
    status = r.json().get('status', None)
    print('status: {}'.format(status))
    print()

status: submitted

status: submitted

status: submitted

status: submitted

status: submitted

status: submitted

status: submitted

status: submitted

status: submitted

status: submitted

status: submitted

status: submitted

status: submitted

status: submitted

status: submitted

status: submitted

status: submitted

status: submitted

status: submitted

status: submitted

status: submitted

status: submitted

