In [55]:
import numpy as np
import pandas as pd
import sqlite3
from PIL import Image
from pathlib import Path
import argparse
from tqdm import tqdm
import glob
import os
import skimage.io
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from functools import partial, reduce
import time


In [73]:
top_dir = '/Users/habbasi/Desktop/'
proj_dir = 'TA'
batch= 'SIGMA2_Pilot_2013_10_11'
metadata_dfpath = '/Users/habbasi/Desktop/TA/input/metadata_TA.csv'

In [57]:
class load_data:
    
    def __init__(self,top_dir,proj_dir, batch):
        
        self.top_dir = top_dir
        self.proj_dir = proj_dir
        self.batch = batch
        

    def sqlpath(self):
        path = os.path.join(self.top_dir, self.proj_dir, 'workspace', 'backend', self.batch)
        spath = []
    
        for folder, sub, files in os.walk(path):    
            for f in files:
                if 'sqlite' in f:    
                    spath.append(os.path.abspath(os.path.join(folder, f)))
        return spath
    
    def filepath(self):
        path = os.path.join(self.top_dir, self.proj_dir, 'workspace', 'backend', self.batch)
        fpath = []
    
        for folder, sub, files in os.walk(path):    
            for f in files:
                if 'normalized' in f:    
                    fpath.append(os.path.abspath(os.path.join(folder, f)))
        return fpath
    
    def feat_list(self):    
        path = os.path.join(self.top_dir, self.proj_dir, 'input', 'feature_list.txt')
        flist = np.loadtxt(str(path), dtype=str, skiprows=1)
        return flist
    
    
    
path = load_data(top_dir, proj_dir, batch)   
sql= sorted(path.sqlpath())
csv = sorted(path.filepath())
featlist = path.feat_list()
csv

['/Users/habbasi/Desktop/TA/workspace/backend/SIGMA2_Pilot_2013_10_11/41744_normalized.csv',
 '/Users/habbasi/Desktop/TA/workspace/backend/SIGMA2_Pilot_2013_10_11/41754_normalized.csv',
 '/Users/habbasi/Desktop/TA/workspace/backend/SIGMA2_Pilot_2013_10_11/41755_normalized.csv',
 '/Users/habbasi/Desktop/TA/workspace/backend/SIGMA2_Pilot_2013_10_11/41756_normalized.csv',
 '/Users/habbasi/Desktop/TA/workspace/backend/SIGMA2_Pilot_2013_10_11/41757_normalized.csv']

In [58]:
def sqlite_connect(path):
    
    conn = sqlite3.connect(path)
    image = pd.read_sql_query("select *from Image", conn)
    cells = pd.read_sql_query("select  * from Cells", conn)
    cyto= pd.read_sql_query("select * from Cytoplasm", conn)
    nuclei= pd.read_sql_qNiuery("selNiect * from Nuclei", conn)
    dt = reduce(lambda x,y: pd.merge(x,y, on=["TableNumber", "ImageNumber", "ObjectNumber"], how='left'), [cells, nuclei, cyto])
    df = reduce(lambda x,y: pd.merge(x,y, on=["TableNumber", "ImageNumber"], how='left'), [dt, image])

    return df


def aggregate_func(population, strata, variables, operation):

    dt = pd.concat([population[strata], population[variables]], axis=1)   
    if (operation == 'mean'):    
        tmp = (dt.groupby(strata)[variables]
                .apply(lambda x: np.mean(x))
                .reset_index()
               )
        
    elif (operation == 'median'): 
        tmp = (dt.groupby(strata)[variables]
               .median()
               .reset_index())     
    else:
        print("No operation defined")
        

    return tmp
    
plates = ['41744', '41754', '41755', '41756', '41757']


def combined_profiles():
    combined = []
    for p, cpath, spath in zip(plates, csv, sql):
        d = pd.read_csv(str(cpath))
        meta = [col for col in d.columns if "Meta" in col]
        pmeta = d.loc[:, meta]
        sql_data = sqlite_connect(str(spath))
        imagecol = ['Image_Metadata_Well', 'Image_Metadata_Plate']
        data = reduce(lambda x, y: pd.merge(x, y, left_on = ["Image_Metadata_Plate","Image_Metadata_Well"],
                                            right_on= ["Metadata_Plate", "Metadata_Well"], how='left'), [sql_data, pmeta])
    
        controls = data.query('Metadata_ASSAY_WELL_ROLE == "Untreated"')
        scaler = StandardScaler().fit(controls[featlist])
        df_scaled = pd.DataFrame(scaler.transform(data[featlist]), columns=featlist)
        metadata = [col for col in data.columns if col.startswith('Metadata')]
        dmeta = data[metadata]
        df_scaled = pd.concat([dmeta, df_scaled], axis=1, sort=False)
    
        pf = aggregate_func(population = df_scaled,
                        strata= ['Metadata_Plate', 'Metadata_Well'],
                        variables = featlist,
                        operation = 'median')
        

        prf = reduce(lambda x, y: pd.merge(x, y, on = ["Metadata_Plate","Metadata_Well"], how='left'),
              [pmeta, pf])
        
        prf.to_csv('/Users/habbasi/Desktop/TA/workspace/backend/SIGMA2_Pilot_2013_10_11/'+str(p)+'_median.csv')
        
        
        combined.append(prf)
        
        
    
    return combined
    
    
starttime = time.time()    
combined_prf = pd.concat(combined_profiles())    


print('That took {} seconds'.format(time.time() - starttime))

That took 19292.73126888275 seconds


In [201]:
if 'Metadata_mmoles_per_liter' not in combined_prf:
    combined_prf['Metadata_mmoles_per_liter'] = int(10)

strata = ['Metadata_broad_sample', 'Metadata_mmoles_per_liter', 'Metadata_Plate_Map_Name']
profiles = (combined_prf.groupby(strata)[featlist]
                .apply(lambda x: np.mean(x))
                .reset_index()
               ) 

metadata_df = pd.read_csv(str(metadata_dfpath))
prf= pd.merge(profiles, metadata_df, on='Metadata_broad_sample', how='left' )

### Calculating correlation profiles

In [292]:
corr_matrix = np.corrcoef(prf[featlist])
upper = np.triu(corr_matrix, k=1)
tmp = pd.DataFrame(upper, 
                   columns=list(prf.Metadata_broad_sample), 
                   index = list(prf.Metadata_broad_sample))

tmp1 = (tmp.stack()
        .reset_index())
        
new_col=['Var1', 'Var2', 'value']
tmp1.columns = new_col
tmp1 = tmp1.query('value != 0.000000')
tmp1.shape

(52003, 3)

In [62]:
#combined_prf.to_csv('/Users/habbasi/Desktop/TA/workspace/backend/SIGMA2_Pilot_2013_10_11/combined_median.csv')