# Load required modules

[Link to SOP](https://docs.google.com/document/d/1Oi2pUYFsxWAtLrsqoprCOqMYejmTCKDhwDJGekK00Og/edit)

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import sys, os
import pickle
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pickle

cwd = os.getcwd()
slash_idx = [i for i,l in enumerate(cwd) if l=='/']
repo_dir = cwd[:slash_idx[-1]]
base_dir = cwd[:slash_idx[-2]]
sys.path.append(repo_dir)
sys.path.append(base_dir)

from analysis_pipeline.sequence_update import DetectionsSequenceUpdate
from analytics_utils.analytics_utils.lims_utils import get_plate_runs, plates_from_exp, plates_from_workflow
from analytics_utils.analytics_utils.s3_interface import download_from_s3, upload_to_s3, s3_imgupload, s3_df2csv, s3_csv2df
from analytics_utils.analytics_utils.table_properties import maldi_detections_columns, lcms_detections_columns, lcms_detections_chiral_columns

dataset_folder = '../DATASETS/'
results_folder = '../detectionspipeline_results/'
models_folder = '../MODELS/'
s3_model_bucket = 'ml-models-registry'
s3_model_bucket_subdirectory = 'exerevnetes-preprocessing-models/'

# Update sequences in Postgres

## 1. Set experiment / workflow / plate list, plate type, reference enzyme

In [12]:
exp_workflow_barcode_list = ['WF10153'] # None # 
plate_list = None # ['PLT18088'] # 
reference_enzyme = 'ENZ10045'
plate_types_to_fetch = ['LCMS_FINAL'] # ['HTS', 'LCMS_FINAL'] # 

if exp_workflow_barcode_list is not None:
    plate_type_list = []
    plate_list = []
    for str_find in plate_types_to_fetch:
        for exp_workflow_barcode in exp_workflow_barcode_list:
            if 'EXP' in exp_workflow_barcode:
                plate_list_subset = plates_from_exp(exp_workflow_barcode, str_find)
            elif 'WF' in exp_workflow_barcode:
                plate_list_subset = plates_from_workflow(exp_workflow_barcode, str_find)
            plate_list += plate_list_subset
            plate_type_list += [str_find]*len(plate_list_subset)
else:
    plate_type_list = plate_types_to_fetch * len(plate_list)

print(f"Plate list ({len(plate_list)}): {[(p, pt) for p, pt in zip(plate_list,plate_type_list)]}")

Plate list (0): []


## 2. Run sequence update to get sequences on plate into Postgres table and S3 datasets

In [11]:
for plate, plate_type in zip(plate_list, plate_type_list): 
    
    print(plate, plate_type)
    
    # get sequences on plate
    sequence_update = DetectionsSequenceUpdate()
    sequence_update_single, sequence_update_multi, enzyme_dict = sequence_update.get_sequence_update(plate, reference_enzyme=reference_enzyme, enz_idx_to_use=-1)
    print('Number of wells in sequence update:', len(sequence_update_single))
    
    ## update sequences in postgres
    # HTS plate
    if plate_type=='HTS':
        # 1-sample detections table
        sequence_update.update_table_with_sequences_in_postgres(plate, sequence_update_single, maldi_detections_columns, table='maldi_detections_1',
                                               detections_type='maldi', save_csv_to_s3=True, suffix='-1')
        # 2-sample detections table
        sequence_update.update_table_with_sequences_in_postgres(plate, sequence_update_single, maldi_detections_columns, table='maldi_detections_2',
                                               detections_type='maldi', save_csv_to_s3=True, suffix='-2')
        
    # LCMS plate
    elif plate_type=='LCMS_FINAL':
        try: 
            assert len(sequence_update_single) <= 384, "Too many wells to be an LCMS plate"

            # C18 LCMS detections table
            sequence_update.update_table_with_sequences_in_postgres(plate, sequence_update_single, lcms_detections_columns, table='lcms_detections', 
                                                   detections_type='lcms', save_csv_to_s3=True, suffix='')

            # Chiral LCMS detections table
            sequence_update.update_table_with_sequences_in_postgres(plate, sequence_update_single, lcms_detections_chiral_columns, table='lcms_detections_chiral', 
                                                   detections_type='lcms', save_csv_to_s3=True, suffix='')
            
        except AssertionError as msg:
            print(msg)

PLT18086 LCMS_FINAL
# of unique enzymes: 114
Number of wells in sequence update: 360
548 matching rows found.
# of deduplicated entries: 368
***INFO*** Cleared 548 records from lcms_detections.
Saved 368 rows to Postgres: lcms_detections.
988 matching rows found.
Updated WF10179_C18_LCMSdetections.csv in S3 lcms-file-store.
537 matching rows found.
# of deduplicated entries: 357
***INFO*** Cleared 537 records from lcms_detections_chiral.
Saved 357 rows to Postgres: lcms_detections_chiral.
941 matching rows found.
Updated WF10179_chiral_LCMSdetections.csv in S3 lcms-file-store.
PLT18085 LCMS_FINAL
# of unique enzymes: 114
Number of wells in sequence update: 360
548 matching rows found.
# of deduplicated entries: 368
***INFO*** Cleared 548 records from lcms_detections.
Saved 368 rows to Postgres: lcms_detections.
808 matching rows found.
Updated WF10179_C18_LCMSdetections.csv in S3 lcms-file-store.
548 matching rows found.
# of deduplicated entries: 368
***INFO*** Cleared 548 records fro