In [None]:
# Spreadsheet - select_genes 

### Return one spreadsheet with only those genes selected from an input list: 
* **Select your "Input Files" with the dropdown listboxes.**
* **Press "Select Genes" and the spreadsheet of selected genes will be written to a file by the same name with "_Slt_Gn" appended.**

In [1]:
# %%html
# <style>
# div.input {
#     display:none;
# }
# div.output_stderr{
#     display:none
# }
# </style>

In [2]:
#                                         single cell for select, open and transpose:
#                                         target directory set for docker run -v `pwd`:...   ==  mount user data
target_dir = '../../'

import warnings
warnings.filterwarnings('ignore')

import os
import sys
import pandas as pd
import knpackage.toolbox as kn

from IPython.display import display
import ipywidgets as widgets

# utility
def read_a_list_file(input_file_name):
    """
    Args:
        input_file_name:     full path name of a file containing a list
    Returns:
        a list that is contained in the file
    """
    with open(input_file_name, 'r') as fh:
        str_input = fh.read()
    return list(str_input.split())

#                                         local function to read files and get common samples and write:
def select_genes(obie_jobie):
    if len(my_file_list) == 0 or my_file_list[0] == 'No Data':
        return
    
    file_name_1 = os.path.join(target_dir, flistbx_1.value)
    file_name_2 = os.path.join(target_dir, flistbx_2.value)

    gene_select_list = read_a_list_file(file_name_2)
    spreadsheet_df = pd.read_csv(file_name_1, sep='\t', index_col=0, header=0)
    gene_names = kn.extract_spreadsheet_gene_names(spreadsheet_df)
    intersection_names = kn.find_common_node_names(gene_names, gene_select_list)
    spreadsheet_intersected_df = spreadsheet_df.loc[intersection_names] 
    
    name_base_1, file_extension_1 = os.path.splitext(file_name_1)
    # print(os.path.basename(name_base_2))
    # print(os.path.relpath(name_base_2,start=target_dir))
    outfile_name = name_base_1 + '_Slt_Gn.tsv'
    spreadsheet_intersected_df.to_csv(outfile_name, sep='\t', index=True, header=True)
    print('Output written to\n', outfile_name)

#                                         Get list of (docker run -v) mounted files:
flist = os.listdir(target_dir)
FEXT = ['.tsv', '.txt', '.df']
my_file_list = []
for f in flist:
    if os.path.isfile(os.path.join(target_dir, f)):
        noNeed, f_ext = os.path.splitext(f)
        if f_ext in FEXT:
            my_file_list.append(f)

#                                         (docker run -v) mounted files was empty:
if len(my_file_list) <= 0:
    my_file_list.append('No Data')

#                                         Create and display the widget controls:
flistbx_1 = widgets.Dropdown(
    options=my_file_list,
    value=my_file_list[0],
    description='Select Spreadsheet File:'
)
display(flistbx_1)

flistbx_2 = widgets.Dropdown(
    options=my_file_list,
    value=my_file_list[0],
    description='Select Gene List File:'
)
display(flistbx_2)

output_file_button = widgets.Button(
    description='Select Genes',
    disabled=False,
    button_style='',
    tooltip='select genes button',
    data_file_key='output_file_name'
    )
output_file_button.on_click(select_genes)
display(output_file_button)


Output written to
 ./transform_data/gene_samples_1_Slt_Gn.tsv
