#### We want to access all the cells in LIMS that have failed due to input resistance and map them back to Nathan's tSNE plot.

1. What cells are we searching for in LIMS?
2. What is needed to fit into tSNE?
3. Do I want features for LIMS or file paths?



In [1]:
import pg8000
import pandas as pd

#code from Agata

def _connect(user="limsreader", host="limsdb2", database="lims2", password="limsro", port=5432):
    conn = pg8000.connect(user=user, host=host, database=database, password=password, port=port)
    return conn, conn.cursor()

def _select(cursor, query):
    cursor.execute(query)
    columns = [ d[0] for d in cursor.description ]
    return [ dict(zip(columns, c)) for c in cursor.fetchall() ]

def limsquery(query, user="limsreader", host="limsdb2", database="lims2", password="limsro", port=5432):
    """A function that takes a string containing a SQL query, connects to the LIMS database and outputs the result."""
    conn, cursor = _connect(user, host, database, password, port)
    try:
        results = _select(cursor, query)
    finally:
        cursor.close()
        conn.close()
    return results

def get_lims_dataframe(query):
    '''Return a dataframe with lims query'''
    result = limsquery(query)
    try:
        data_df = pd.DataFrame(data=result, columns=result[0].keys())
    except IndexError:
        print "Could not find results for your query."
        data_df = pd.DataFrame()
    return data_df

In [4]:
lims_query = "SELECT err.id, err.recording_date, err.failed_bad_rs, err.initial_access_resistance_mohm, \
err.workflow_state, s.name, s.ephys_roi_result_id, s.project_id, proj.id, proj.code \
FROM ephys_roi_results err JOIN specimens s ON s.ephys_roi_result_id = err.id \
JOIN projects proj ON s.project_id = proj.id \
WHERE (failed_bad_rs = 'TRUE' \
AND err.failed_electrode_0 = 'FALSE' \
AND err.failed_clogged_pipette = 'FALSE' \
AND err.failed_no_seal = 'FALSE' \
AND err.failed_other = 'FALSE' \
AND err.workflow_state != 'manual_passed') \
AND (proj.code = 'T301' OR proj.code = 'T301x')"

lims_df = get_lims_dataframe(lims_query)
lims_df.tail()

Unnamed: 0,ephys_roi_result_id,code,name,failed_bad_rs,workflow_state,recording_date,initial_access_resistance_mohm,project_id,id
67,601831626,T301,Nos1-CreERT2;Ai14-333287.03.01.01,True,manual_failed,2017-07-10 21:03:11,20.095886,305094322,305094322
68,602660919,T301,Scnn1a-Tg2-Cre;Ai14-333165.04.02.01,True,manual_failed,2017-07-13 22:16:27,20.866342,305094322,305094322
69,604693290,T301x,Rbp4-Cre_KL100;Ai14-337193.04.02.01,True,manual_failed,2017-07-26 22:50:46,27.793394,300080300,300080300
70,605537141,T301x,Gad2-IRES-Cre;Ai14-336420.04.02.01,True,manual_failed,2017-07-28 20:37:50,20.118904,300080300,300080300
71,605538385,T301x,Gad2-IRES-Cre;Ai14-336420.02.02.01,True,manual_failed,2017-07-28 21:32:11,25.898184,300080300,300080300


In [6]:
#look at the data by genotype
lims_df['genotype'] = lims_df['name'].apply(lambda x: x[:-16])
lims_df.groupby(['genotype']).size()

genotype
Chat-IRES-Cre-neo;Ai14                13
Chrna2-Cre_OE25;Ai14(IVSCC)            2
Chrna2-Cre_OE25;Pvalb-T2A-Dre;Ai66     2
Esr2-IRES2-Cre;Ai14                    1
Gad2-IRES-Cre;Ai14                     3
Htr3a-Cre_NO152;Ai14                   2
Htr3a-Cre_NO152;Pvalb-T2A-Dre;Ai66     1
Ndnf-IRES2-dgCre;Ai14                  3
Nkx2-1-CreERT2;Ai14 (IVSCC)            5
Nos1-CreERT2;Ai14                      2
Nos1-CreERT2;Sst-IRES-FlpO;Ai65        2
Ntsr1-Cre_GN220;Ai14                   1
Oxtr-T2A-Cre;Ai14                      1
Pvalb-IRES-Cre;Ai14                   13
Rbp4-Cre_KL100;Ai14                    1
Scnn1a-Tg2-Cre;Ai14                    1
Scnn1a-Tg3-Cre;Ai14                    1
Sim1-Cre_KJ18;Ai14                     1
Sst-IRES-Cre;Ai14                      1
Tlx3-Cre_PL56;Ai14                     2
Vip-IRES-Cre;Ai14                      8
Vip-IRES-Cre;Ai14(IVSCC)               1
Vipr2-IRES2-Cre;Ai14                   5
dtype: int64

In [39]:
#save to csv for use elsewhere
lims_df.to_csv('tSNE_cells.csv')