In [18]:
pd.read_csv?

In [58]:
import os

from scipy.stats import linregress
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

N_EXTRA_LINES = 409
COLUMNS_TO_PARSE = 'C:Z'
ROWS_TO_SKIP = 2

STANDARDS = pd.Series([8, 8, 6, 6, 4, 4, 2, 2, 1, 1, 0.5, 0.5, 0.025, 0.025, 0, 0])
STANDARDS_COL = 24
BLANKS_COL = 23

def _parse_fluorescence(filename, filetype):
    filetype = filetype.lower()
    
    kwargs = dict(skiprows=ROWS_TO_SKIP)
    if filetype == 'excel':
        parser = pd.read_excel
        kwargs.update(dict(skip_footer=N_EXTRA_LINES, parse_cols=COLUMNS_TO_PARSE))
    elif filetype == 'csv':
        parser = pd.read_csv
        
        # Use columns 1-24 (25 is not included)
        kwargs['usecols'] = range(1, 25)
        kwargs.update
    elif filetype == 'txt' or filetype == 'table':
        parser = pd.read_table
        kwargs.update(dict(nrows=16, encoding='utf-16', skiprows=2, 
                   usecols=range(2, 26)))
    else:
        raise ValueError(f"'{filetype}' is not a supported file type. "
                         "Only 'csv' and 'excel' are supported")
    return parser(filename, **kwargs)


def _maybe_make_directory(filename):
    directory = os.path.dirname(filename)
    try:
        os.mkdir(directory)
    except FileExistsError:
        pass


def _plot_regression(means, regressed, plate_name, output_folder='.'):
    means.plot(legend=True)
    y = pd.Series(regressed.slope * means.index + regressed.intercept, index=means.index, 
              name='Regression')
    y.plot(legend=True)

    # :.5 indicates 5 decimal places
    plt.title(f'$R^2$ = {regressed.rvalue:.5}')

    pdf = os.path.join(output_folder, 'regression', f'{plate_name}_regression_lines.pdf')
    _maybe_make_directory(pdf)
    plt.savefig(pdf)
    return pdf


def _heatmap(data, plate_name, datatype, output_folder): 
    sns.heatmap(data)
    plt.title(f'{plate_name} {datatype}')
    pdf = os.path.join(output_folder, datatype, 
                       f'{plate_name}_{datatype}_heatmap.pdf')
    _maybe_make_directory(pdf)
    plt.savefig(pdf)
    print(f'{plate_name}: Wrote {datatype} heatmap to {pdf}')
    return pdf


def _fluorescence_to_concentration(fluorescence, standards_col, standards,
                                   plate_name, plot=True, 
                                   output_folder='.', r_minimum=0.98,):
    """Use standards column to regress and convert to concentrations"""
    means = fluorescence[standards_col].groupby(standards).mean()
    stds = fluorescence[standards_col].groupby(standards).std()
    regressed = linregress(means.index, means)
    
    if (regressed.rvalue < r_minimum):
        raise ValueError(f'Regression failed test: {regressed.rvalue} < {r_minimum}')

    # Convert fluorescence to concentration
    concentrations = (fluorescence - regressed.intercept)/regressed.slope

    if plot:
        pdf = _plot_regression(means, regressed, plate_name)
        print(f'{plate_name}: Wrote regression plot to {pdf}')
        
        _heatmap(fluorescence, plate_name, 'fluorescence', output_folder)
        _heatmap(concentrations, plate_name, 'concentrations', output_folder)
   
    return concentrations


def _get_good_cells(concentrations, blanks_col, plate_name, mouse_id, 
                    plot=True,
                    output_folder='.'):
    """Use the blanks column to determine whether a well has enough fluorescence"""
    
    average_blanks = concentrations[blanks_col].mean()
    std_blanks = concentrations[blanks_col].std()

    # Minimum threshold: One standard deviation away from the mean
    avg_std = average_blanks + std_blanks

    is_cell_good = concentrations > avg_std
    n_good_cells = is_cell_good.sum().sum()
    print(f'{plate_name} ({mouse_id}) has {n_good_cells} cells passing Concentration QC')
    good_cells = concentrations[is_cell_good]
    
    without_standards_or_blanks = good_cells.loc[:, :(blanks_col-1)]
    
    if plot:  
        _heatmap(without_standards_or_blanks, plate_name, 
                 'without_standards_or_blanks', output_folder)

    return without_standards_or_blanks


def _transform_to_pick_list(good_cells, plate_name, mouse_id, datatype, output_folder='.'):
        # Convert 2d matrix into tall, tidy dataframe
    echo_picks = good_cells.unstack().reset_index().dropna()
    echo_picks = echo_picks.rename(
        columns={'level_0': 'column_number', 'level_1': 'row_letter', 0: 'concentration'})
    echo_picks['well'] = echo_picks.apply(lambda x: '{row_letter}{column_number}'.format(**x), 
                                      axis=1)
    echo_picks['plate'] = plate_name
    echo_picks['mouse_id'] = mouse_id
    echo_picks['name'] = echo_picks.apply(lambda x: '{well}-{plate}-{mouse_id}-1'.format(**x), 
                                      axis=1)
    csv = os.path.join(output_folder, datatype, f'{plate_name}_echo.csv')
    _maybe_make_directory(csv)
    echo_picks.to_csv(csv, index=False)
    print(f'Wrote {datatype} ECHO pick list to {csv}')
    return csv


def make_echo_picks(filename, plate_name, mouse_id, filetype='excel', 
                    standards_col=24, blanks_col=23, standards=STANDARDS, plot=True,
                    output_folder='.'):
    fluorescence = _parse_fluorescence(filename, filetype)
    
    concentrations = _fluorescence_to_concentration(fluorescence, standards_col, 
                                                    standards, plate_name, plot)
    good_cells = _get_good_cells(concentrations, blanks_col, plate_name, mouse_id, 
                                 plot)
    
    _transform_to_pick_list(good_cells, plate_name, mouse_id, 'cherrypicked')
    _transform_to_pick_list(concentrations, plate_name, mouse_id, 'non_cherrypicked')

In [59]:
filename = '/Users/olgabot/Downloads/17_Plate_Concentrations.xlsx'
make_echo_picks(filename, 'MAA000445', 'killer_mouse')

MAA000445: Wrote regression plot to ./regression/MAA000445_regression_lines.pdf
MAA000445: Wrote fluorescence heatmap to ./fluorescence/MAA000445_fluorescence_heatmap.pdf
MAA000445: Wrote concentrations heatmap to ./concentrations/MAA000445_concentrations_heatmap.pdf
MAA000445 (killer_mouse) has 322 cells passing Concentration QC
MAA000445: Wrote without_standards_or_blanks heatmap to ./without_standards_or_blanks/MAA000445_without_standards_or_blanks_heatmap.pdf
Wrote cherrypicked ECHO pick list to ./cherrypicked/MAA000445_echo.csv
Wrote non_cherrypicked ECHO pick list to ./non_cherrypicked/MAA000445_echo.csv


In [None]:
# python echo_picks.py /Users/olgabot/Downloads/17_Plate_Concentrations.xlsx MAA000459

In [73]:
filename = '/Users/olgabot/Google Drive/MACA (1)/384W QC/Plate Reader/raw_plate_reader_output/MACA/MAA000093.txt'
df = pd.read_table(filename, nrows=16, encoding='utf-16', skiprows=2, 
                   usecols=range(2, 26))
print(df.shape)
df

(16, 24)


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,15,16,17,18,19,20,21,22,23,24
0,731006,873007,1113357,1492424,589621,564632,748694,613586,740537,1602566,...,1243373,255252,248510,806924,808498,262660,266744,799838,268673,3242734
1,1168882,258026,547759,659229,1366918,919721,2323259,515616,301643,850455,...,1716009,1836831,273390,1056237,257527,1244178,290637,282237,276163,3246706
2,924219,982525,1243206,968123,889851,1408803,546248,691164,1016926,1041757,...,1287537,968378,289336,738179,836008,271833,276794,1173594,295876,3096369
3,592324,622233,1148998,1448705,896437,1070115,797366,691908,324985,961770,...,849998,1017949,272177,309176,2599400,1228320,268349,288964,285330,3192044
4,1212479,903301,2757483,998196,1504269,1345381,521450,986564,538029,1020023,...,490397,842310,2060680,844137,1371612,1101372,457590,1543708,530835,2474236
5,733348,325412,1833515,956621,1478691,701911,613518,802765,1138496,722833,...,639240,786258,1054583,1180371,1293930,316037,912348,1021011,325776,2503978
6,1126796,1123947,763589,312139,730624,336818,584930,1220858,1599240,315631,...,16029,1508882,17584,287739,18249,1005968,22384,811413,23346,1124369
7,1087765,990876,1136476,1105265,916402,974045,589731,330873,293926,1057031,...,1447193,1123104,958727,333440,1565334,807848,1900682,1417178,768344,1149374
8,809745,723474,876241,975116,778906,785226,300179,1056736,314986,756136,...,605825,1400660,837709,324552,848309,1063602,274628,840692,305866,442701
9,1134499,1157014,836333,333926,861860,685306,1344508,1334488,723852,1088262,...,883104,880504,736720,780675,1327396,1433775,946284,1257864,322696,445879


In [36]:
%pdb

Automatic pdb calling has been turned ON


In [17]:
filename = '/Users/olgabot/Downloads/170727_MAA000149_QC.csv'
! head $filename

##BLOCKS= 1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Plate:,Plate1,1.3,PlateFormat,Endpoint,Fluorescence,TRUE,Raw,FALSE,1,,,,,,1,540,1,24,384,505,Manual,,,,9,,,,1,16,505
,Temperature(�C),1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,,,,,,
,23,1421938,750111,139738,273512,349591,721672,537246,834050,285664,535438,710782,198567,272425,282872,1032059,611553,1004124,508093,332353,588573,234966,947166,115300,3281056,,Lowest than the lowest standard,0.5,ng/ul,,
,,774930,592244,505029,366702,294824,1014951,435331,1588580,274698,311315,1038142,553255,989942,720179,369025,379399,337754,804324,520492,213582,1276988,998643,116032,2966452,,Highest than the highest standard,6,ng/ul,,
,,1243180,309011,243414,376270,358943,1193840,1061270,884319,331055,663492,711320,203960,909744,523911,1085230,229072,322782,669374,290037,296617,507510,318860,94646,2525510,,,,,,
,,1524238,292198,179480,336010,452498,467045,610111,601739,566217,599044,110757,661294,194358,468071,520768,474482,831232

In [40]:
# make_echo_picks(filename, 'template', 'killer_mouse', filetype='csv')

In [5]:
pd.read_csv()

TypeError: parser_f() missing 1 required positional argument: 'filepath_or_buffer'

> [0;32m<ipython-input-5-16550b21db65>[0m(1)[0;36m<module>[0;34m()[0m
[0;32m----> 1 [0;31m[0mpd[0m[0;34m.[0m[0mread_csv[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0m
[0m
ipdb> a
ipdb> q


In [24]:
from glob import iglob

In [None]:
metadata = pd.read_csv(metadata_filename)

for filename in iglob('PlateQC*.xlsx'):
    mouse_id = metadata.loc[plate_name, 'Mouse_ID']
    csv = make_echo_picks(filename, plate_name, mouse_id, output_folder)
    