# HEAL Monday Studies Board Update pipeline

Jupyter Notebook to follow the SOP for update the HEAL Monday Board. 
The notebook can be used for either step by step exploration, or running from a service like Google Colab.

In [None]:
## If running on Google Colab, run this cell to mount Google Drive to access files on Google Drive.
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [None]:
## If running from Google Colab, might need to install this library
!pip install xlsxwriter

In [6]:
import sys
import logging
from pathlib import Path
sys.path.append('../scripts/')
import monday_board_update


In [None]:
## Set this to the directory where:
## 1- Monday Studies board has been exported.
## 2- All relevant tables from MySql database for HEAL have been exported as a csv to.
input_dir = Path("/pat/to/data/dir")

In [23]:
## Setup logger
logging.basicConfig(
        level=logging.DEBUG,
        format="%(asctime)s [%(levelname)s] %(message)s",
        filename= input_dir / "report-log.txt",
    )
logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))


In [24]:
logging.info("---- STEP 1: Looking at Study Lookup Table")
gt_file = monday_board_update.import_study_lookup_table(input_dir)
    

In [25]:
logging.info("---- STEP 2: Importing Monday Studies Board")
monday_board = monday_board_update.import_monday_board(input_dir)


In [26]:
logging.info("---- STEP 3: Compare lookup table and Monday Board")
mondayboard_missingin_lookup, lookup_fields = monday_board_update.compare_study_loookup_monday(gt_file, monday_board)

In [27]:
logging.info("---- STEP 4: Importing tables from MySQL and combining relevant information")
combined_data_ph1 = monday_board_update.import_mysql_data(input_dir, gt_file, monday_board, lookup_fields)


In [28]:
logging.info("---- STEP 5: Filling holes with MDS data")
combined_data_ph1 = monday_board_update.fill_in_holes_from_mds(input_dir, combined_data_ph1)


In [29]:
    ## Add CTN  data
logging.info("---- STEP 6: Adding any CTN data from MDS")
ctn_fields_platform = monday_board_update.get_ctndata_from_mds(input_dir)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ctn_data['project_title'] = ctn_data['project_title'].replace('0', '')


In [30]:
logging.info("---- STEP 7: Combining everything together")
all_data = monday_board_update.combine_mysql_ctn(combined_data_ph1, ctn_fields_platform)


In [31]:
logging.info("---- STEP 8: Final Manipulation of all the data to make it Monday Board ready")
combined_data = monday_board_update.prepare_for_monday(all_data)

In [33]:
logging.info("---- STEP 9: Final numbers and Export")
monday_board_update.export_finaldata(input_dir, combined_data, mondayboard_missingin_lookup, monday_board)  



****** Investigate/Delete the following entries on Monday that are not in there in the new Monday Excel upload
****** Investigate/Delete the following entries on Monday that are not in there in the new Monday Excel upload


1378  10428343_HDP00882                 NaN         NaN   
1379  10488140_HDP00883                 NaN         NaN   
1380       9673173_none                 NaN         NaN   
1381       9769689_none                 NaN         NaN   
1389           10460178            10460178           -   
1390           10250564            10250564           -   
1391           10343721            10343721           -   
1394           10217075            10217075           -   
1395           10294238            10294238           -   
1402            9950853             9950853           -   
1403            9829976             9829976           -   
1404           10133699            10133699           -   
1409           10331849            10331849           -   
1410           10197811            10197811           -   
1411           10197809            10197809           -   
1412            9850643             9850643           -   
1413            9823898             9823898           - 

                   Name Most Recent Appl_ID HDP appl_ID  \
1378  10428343_HDP00882                 NaN         NaN   
1379  10488140_HDP00883                 NaN         NaN   
1380       9673173_none                 NaN         NaN   
1381       9769689_none                 NaN         NaN   
1389           10460178            10460178           -   
1390           10250564            10250564           -   
1391           10343721            10343721           -   
1394           10217075            10217075           -   
1395           10294238            10294238           -   
1402            9950853             9950853           -   
1403            9829976             9829976           -   
1404           10133699            10133699           -   
1409           10331849            10331849           -   
1410           10197811            10197811           -   
1411           10197809            10197809           -   
1412            9850643             9850643           - 

In [34]:
mondayboard_missingin_lookup[~(mondayboard_missingin_lookup.study_type == 'CTN')].Name.values

array(['10428343_HDP00882', '10488140_HDP00883', '9673173_none',
       '9769689_none', '10460178', '10250564', '10343721', '10217075',
       '10294238', '9950853', '9829976', '10133699', '10331849',
       '10197811', '10197809', '9850643', '9823898', '10619029',
       '10493291', '9555046', '9775470', '10589995'], dtype=object)