In [1]:
import psycopg2
import os
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import sqlalchemy as sa
import numpy as np
import seaborn as sn
import math
import Orange
import cv2

initialize a Database, giving the username, password, server, port and the database name. Returns engine variable type

### 2.1 deteçao de mitosis com dataset

In [2]:
def initializeDb(username, password, server, port, database):
    parameters = { 
               'username': username, 
               'password': password,
               'server':   server,
                'port' : port,
               'database': database
             }
    connection = 'postgresql://{username}:{password}@{server}:{port}/{database}'.format(**parameters);
    
    return sa.create_engine(connection, encoding="utf-8")

- Adds new features to the given dataframe :
 - pctarea - change in percentage of area comparing with previous frame 
 - pctperimete - change in percentage of perimeter comparing with previous frame 
 - pctcircularity - change in percentage of the circularity comparing with previous frame 

In [None]:
def calculateNewFeatures(features):
    features['pctarea'] = features[['id','area','areadiff','perimeter','circularity']].pct_change()['area']*100
    features['pctperimeter'] = features[['id','area','areadiff','perimeter','circularity']].pct_change()['perimeter']*100
    features['pctcircularity'] = features[['id','area','areadiff','perimeter','circularity']].pct_change(periods=1)['circularity']*100
    for index, row in features.iterrows():
        if math.isnan(row['areadiff']):
            features.loc[index,'pctarea'] = float('nan')
            features.loc[index,'pctperimeter'] = float('nan')
            features.loc[index,'pctcircularity'] = float('nan')


- Procedure that calculates the lowest or Highest change in a procedure with the previous frames
- 1st for cicle creates the needed columns names for the auxiliary dataframe
- 2nd for cicle calculates the change in percentage comparing needed frames
- 3rd for cicle set none values to incalculate values
 - frames : defines the required number of previous frames to compare with ( 5 by default )
 - minimun : true to get the lowest change, false for the highest ( true by default )
 - feature : feature selected to study
 
 

In [None]:
def calculatePctChange(features,feature, frames=5, minimun=True):
    columns = []
    for i in range(1,frames):
        columns.append("a"+str(i+1));
    d = pd.DataFrame(columns = columns)
    for i in range(1,frames):
        d["a"+str(i+1)]= features[['id','area','areadiff','perimeter','circularity']].pct_change(periods=i+1)[feature]*100
    d['pct'+feature] = features['pct'+feature];
    for index, row in d.iterrows():
        if math.isnan(row['pct'+feature]):
            for i in range(1,frames):
                d.loc[index,"a"+str(i+1)] = float('nan')
            d.loc[index+1,"a2"] = float('nan')
        for i in range(2,frames):
            if math.isnan(row["a"+str(i)]):
                d.loc[index+1,"a"+str(i+1)] = float('nan')
    if minimun:
        features['minPct' + feature] = d.min(axis=1)
    else:
        features['maxPct' + feature] = d.max(axis=1)

Add a column to the elected dataframe that shows if the cell has values in previous frames ( 5 by default)

In [None]:
def checkHistory(features, frames = 5):
    features['historicalRecord'] = True
    for index, row in features.iterrows():
            for i in range (0,frames):
                if index-i>=0:
                    if(features.loc[index]['frame'] - features.loc[index-i]['frame'] != i or features.loc[index]['id'] !=features.loc[index-i]['id']):
                        features.loc[index,'historicalRecord'] = False;
                        break;
                else:
                    features.loc[index,'historicalRecord'] = False;


add 2 new columns that stores if that is the last frame or the one before that one, using the cells table of that database to get the ground truth values

In [None]:
def labelMitosis(engine,features):
    cells_table = pd.read_sql_query('select * from cells',con=engine);
    features['Mitosis'] = False;
    features['Pre-Mitosis'] = False;
    parentid_table = cells_table.groupby(['parentid']).count()
    parentid_table = parentid_table.loc[~(parentid_table.index == 0)]['id']
    tmp = parentid_table.index[:]
    for idx in tmp:
        features.loc[((features['gt_id'] == idx) & (features['frame']  == cells_table.loc[cells_table['id'] == idx].iloc[0].dead-1)),'Pre-Mitosis'] = True
        features.loc[((features['gt_id'] == idx) & (features['frame']  == cells_table.loc[cells_table['id'] == idx].iloc[0].dead)),'Mitosis'] = True

In [None]:
def removeIncompleteCells(features):
    df = features[features.incomplete != True];
    df = df.drop(columns='incomplete');
    df = df.reset_index()
    return df;

In [None]:
def main (username,password,server,port,database,frames=5):
   
    engine = initializeDb(username,password,server,port,database);
    
    features = pd.read_sql_query('select * from features',con=engine);
    
    features = removeIncompleteCells(features);
    
    checkHistory(features);
    
    calculateNewFeatures(features)
    
    calculatePctChange(features,'area');
    
    calculatePctChange(features,'circularity',minimun = False);
    #print features
    #labelMitosis(engine,features);
  
    return features;

In [None]:
features = main("postgres","postgres","localhost",5434,"Fluo-N2DH-SIM+_GT")
#print features.loc(features[pre-mitosis] = True)

In [None]:
print features.loc[features['id'] == 4]

In [None]:
xpto = features.loc[(((features['minPctarea']>20)   | ((features['pctarea']>15) & (features['minPctarea']>5) & (features['pctcircularity']<1) & (features['maxPctcircularity'] < 3))) & (features['pctperimeter']>5) & (features['historicalRecord']==True))]
print xpto[['id','frame','pctarea','pctperimeter', 'pctcircularity','minPctarea','maxPctcircularity','Pre-Mitosis']]

## 2.2Associate cells with previous frames

In [3]:
def main2 (username,password,server,port,database):
   
    return initializeDb(username,password,server,port,database);
    
    

In [34]:
engine = main2("postgres","postgres","localhost",5434,"Fluo-N2DH-SIM+_GT")



In [7]:
def createTmpTable(engine,originalTable,tmpTable,col):
    engine.execute("drop table %s"%tmpTable)
    
    sql = "create table %s as (\
    Select * from %s)"%(tmpTable,originalTable)
    engine.execute(sql);
    
    sql = "ALTER TABLE %s \
    ADD COLUMN %s bigint ;"%(tmpTable,col)
    engine.execute(sql);
    
    sql = "ALTER TABLE %s ADD COLUMN id SERIAL PRIMARY KEY;"%(tmpTable)
    engine.execute(sql);
    
    sql = "ALTER TABLE %s ADD COLUMN parentId bigint"%(tmpTable)
    engine.execute(sql);
    
createTmpTable(engine,"lifetime","lifetmp","myId")

In [8]:
def compareCell(engine,table,frame,id):
    sql = "Select b.myid as cell1,St_area(b.geom) as area1, c.myid as cell2,c.id as idgenerated,St_area(c.geom) as area2,\
            (Select radius from ST_MinimumBoundingRadius(b.geom)),\
            ST_distance(ST_Centroid(b.geom),ST_Centroid(c.geom)) as centDistance,\
            ST_Intersects(ST_MakeValid(c.geom),ST_MakeValid(b.geom)) as l from\
            (select * from %s where id = %s ) as b,\
            (select * from %s where frame = %s) as c"%(table,id,table,frame);
    compare = pd.read_sql_query(sql ,con=engine)
    #return compare.loc[(compare['l']==True) & (compare['radius'] + 5 > compare['centdistance'])]
    return compare

In [9]:
def compareCellInFrame(engine,table,frame,id):
    sql = "Select b.myid as cell1,St_area(b.geom) as area1, c.id as cell2,c.id as idgenerated,St_area(c.geom) as area2,\
            (Select radius from ST_MinimumBoundingRadius(b.geom)),\
            ST_distance(ST_Centroid(b.geom),ST_Centroid(c.geom)) as centDistance,\
            ST_Intersects(ST_MakeValid(c.geom),ST_MakeValid(b.geom)) as l from\
            (select * from %s where frame = %s ) as b,\
            (select * from %s where id = %s) as c"%(table,frame,table,id);
    compare = pd.read_sql_query(sql ,con=engine)
    #return compare.loc[(compare['l']==True) & (compare['radius'] + 5 > compare['centdistance'])]
    return compare

In [70]:
def setInitialIDs(engine,cells_table,GT_table,table):
    cells_table = pd.read_sql_query('select * from %s'%(cells_table),con=engine)
    tmp = pd.read_sql_query('select * from %s'%(GT_table),con=engine)
    for id in cells_table.loc[cells_table['parentid'] == 0]['id']:
        try:
            firstAppear = tmp[['id','frame']].loc[tmp['id']==id].sort_values(by=['frame']).iloc[0];
            sql3 = "select * from %s \
            join %s \
            on %s.geom = %s.geom \
            where %s.frame = %s and %s.frame = %s and id = %s"%(table,GT_table,table,GT_table,GT_table,firstAppear['frame'],table,firstAppear['frame'],firstAppear['id']);
            same = pd.read_sql_query(sql3 ,con=engine)
            print same.cellid[0],same.id[0]
            engine.execute("UPDATE %s SET myId = %s WHERE id = %s;"%(table,same.cellid[0]+1,same.id[0]));
        except:
            print "cell " + str(id) + " has no representation"

In [71]:
def restartIds(engine,table):
    sql = """ALTER TABLE %s
    drop COLUMN myId ;

    ALTER TABLE %s
    ADD COLUMN myId bigint not null default 0;"""%(table,table)
    engine.execute(sql);
restartIds(engine,"lifetmp");

In [12]:
def calculateNumberOfMitosis(engine):
    cellsGT = pd.read_sql_query('Select * from cells',con=engine);
    mitosis = cellsGT.groupby(['parentid']).count()
    return mitosis.loc[mitosis.id == 2].size/3

In [73]:
def setInitialIDs(engine,table):
    engine.execute("UPDATE %s SET myId = cellid+1 WHERE frame = 0;"%(table));

In [27]:
def labelMitosis(engine,table,parent,child1,child2):
    df = pd.read_sql_query('select * from %s'%(table),con=engine);
    maxId = df.nlargest(1, ['myid']).myid 
    engine.execute("UPDATE lifetmp SET myId = %s WHERE id = %s;"%((int(maxId)+1),child1));
    engine.execute("UPDATE lifetmp SET myId = %s WHERE id = %s;"%((int(maxId)+2),child2));
    engine.execute("UPDATE lifetmp SET parentId = %s WHERE id = %s or id = %s;"%(parent,child1,child2));

def labelMitosis(engine,table,parent,child1,child2):
    if parent<100:
        engine.execute("UPDATE lifetmp SET myId = %s WHERE id = %s;"%((parent+100),child1));
        engine.execute("UPDATE lifetmp SET myId = %s WHERE id = %s;"%((parent+200),child2));
    else:
        engine.execute("UPDATE lifetmp SET myId = %s WHERE id = %s;"%((parent+10**(len(str(parent/100))+2)),child1));
        engine.execute("UPDATE lifetmp SET myId = %s WHERE id = %s;"%((parent+2*10**(len(str(parent/100))+2)),child2));

In [80]:
def CorrespondCells(engine,table):
    lifeTmp = pd.read_sql_query('select * from %s'%(table),con=engine);
    frames = lifeTmp['frame'].nunique();
    #lastId = lifeTmp['gt_id'].nunique()
    
    missingVal = [];
    mitosisFrame = [];
    for i in range(1,frames):
        j = [];
        l = lifeTmp.loc[(lifeTmp['frame'] ==  i-1)];
        for index,row in l.iterrows():
            j.append(int(row.id))
        for a in j:
            compare = compareCell(engine,table,i,a)
            match = compare.loc[(compare['l']==True) & (compare['radius'] + 5 > compare['centdistance'])]
            if len(match.index) == 1:
                engine.execute("UPDATE lifetmp SET myId = %s WHERE id = %s;"%(match.cell1.iloc[0],match.idgenerated.iloc[0]));
            elif len(match.index) == 2:
                labelMitosis(engine,table,match.cell1.iloc[0],match.idgenerated.iloc[0],match.idgenerated.iloc[1])
                
            else:
                print(a,i)
                print(len(match.index))
        
        cellsWithoutMatch = pd.read_sql_query('select * from %s where frame = %d and myid = 0'%(table,i),con=engine);
        if len(cellsWithoutMatch) > 0:
            for index1,row1 in cellsWithoutMatch.iterrows():
                closeValue = compareCellInFrame(engine,table,i-1,row1.id).sort_values("centdistance").iloc[0]
                if closeValue.centdistance < closeValue.radius*3:
                    child1 = pd.read_sql_query('select * from %s where frame = %d and myid = %d'%(table,i,closeValue.cell1),con=engine)
                    if len(child1.index) == 1:
                        labelMitosis(engine,table,closeValue.cell1,child1.iloc[0].id,row1.id);
                    elif len(child1.index) == 0:
                        engine.execute("UPDATE lifetmp SET myId = %s WHERE id = %s;"%(closeValue.cell1,row1.id));
                    else:
                        print child1
                    lifeTmp = pd.read_sql_query('select * from %s'%(table),con=engine);
            #quit();
        lifeTmp = pd.read_sql_query('select * from %s'%(table),con=engine);

In [81]:
def programCycle(engine,table):
    sql = '''select myid, inicio, fim,
    (select DISTINCT b.myid iniciopai
    from %s b
    where b.myid = MOD(a.myid,100)) iniciopai
    from
    (select myid, min(frame) inicio, max(frame) fim
    from lifetmp
    group by myid) a
    order by iniciopai asc, inicio desc'''%(table)

    pgCycle = pd.read_sql_query(sql,con=engine);
    #print pgCycle
    return pgCycle


In [82]:
restartIds(engine,"lifetmp");
setInitialIDs(engine,"lifetmp")
CorrespondCells(engine,"lifetmp")

   cell1   area1  cell2  idgenerated   area2     radius  centdistance     l
9     30  2170.0      0         1962  2267.5  31.196955      1.469306  True


AttributeError: 'DataFrame' object has no attribute 'myid'

In [None]:
def getProgramLifeCycle(engine,table):
    cycle = programCycle(engine,table)
    tmp = cycle.groupby(['iniciopai']).apply(lambda x: x['fim'].values)
    df = tmp.to_frame()  
    df2 = pd.DataFrame( columns=['id2','deaths']);
    columns = list(df2)
    data2 = [];
    for index, row in df.iterrows():
        a = row[0].tolist()
        a.append(cycle.loc[cycle.myid==row.name].inicio.iloc[0])
        a.sort()
        if index > 0:
            values = [index,[int(v) for v in a ]];
            zipped = zip(columns, values)
            a_dictionary = dict(zipped)
            data2.append(a_dictionary)

    df2 = df2.append(data2,True)
    return df2
lifeCycle = getProgramLifeCycle(engine,"lifetmp")
print lifeCycle

In [None]:
def getAllDeathFrames(id,table,arr):
    arr.append(id)
    sons = table.loc[table['parentid']==id].id.unique();
    if sons.size > 0:
        for a in sons:
            getAllDeathFrames(a,table,arr)
    else:
        return arr
    return arr

In [None]:
def getGTLifeCycle(engine,cellsTable):
    cellsGT = pd.read_sql_query('select * from %s'%(cellsTable),con=engine)
    df = pd.DataFrame( columns=['id','deaths']);
    columns = list(df)
    data = [];
    initCells = cellsGT.loc[cellsGT['parentid']==0].id.unique();

    for cell in initCells:
        family = getAllDeathFrames(cell,cellsGT,[]);
        deaths = cellsGT.loc[cellsGT['id'].isin(family)]['dead'];
        a = deaths.values.tolist();
        a.append(cellsGT.loc[cellsGT['id'] == cell]['born'].iloc[0])
        a.sort();
        values = [cell,[int(v) for v in a ]];
        zipped = zip(columns, values)
        a_dictionary = dict(zipped)
        data.append(a_dictionary)
    df = df.append(data,True)
    return df
GTCycle = getGTLifeCycle(engine,"cells")
#print GTCycle.iloc[7].deaths

In [None]:
def compareValues(dataframe1,dataframe2):
    data = [];
    df = pd.DataFrame( columns=['id','id2','deaths']);
    columns = list(df);
    for index,row in dataframe1.iterrows():
        for index2,row2 in dataframe2.iterrows():
            try:
            #(np.isin(np.array(dataframe1['deaths'][index]),np.array(dataframe2['deaths'][index2]))).all() and len(np.array(dataframe1['deaths'][index]))    
                test1 = np.array(dataframe1['deaths'][index])[:2];
                test2 = np.array(dataframe2['deaths'][index2])[:2];
                if (test1 == test2).all():
                    values = [dataframe1['id'][index],dataframe2['id2'][index2],test1]
                    zipped = zip(columns, values)
                    data_dictionary = dict(zipped)
                    data.append(data_dictionary)
            except:
                continue
    
    df = df.append(data,True)
    return df
print lifeCycle
print GTCycle
df = compareValues(GTCycle,lifeCycle)        
print df

In [None]:
def evaluateGTMitosis(dataframe):
    mitosis = 0
    for index,row in dataframe.iterrows():
        if row.deaths[0] == 0:
            mitosis += len(row.deaths)/2-1
    print "There are %s mitosis from initial cells"%(mitosis)
    return mitosis
#evaluateGTMitosis(GTCycle)

In [None]:
def evaluateDetectedMitosis(dataframe):
    mitosis = 0
    for index,row in dataframe.iterrows():
        if row.deaths[0] == 0:            
            mitosis += len(row.deaths)/2-1
    print "There are %s mitosis well detected"%(mitosis)
    return mitosis
#evaluateDetectedMitosis(df)

In [None]:
def getFirstGenId(id,cellsTable):
    parent = cellsTable.loc[cellsTable.id==id].iloc[0].parentid;
    if(parent==0):
        return id
    else:
        return getFirstGenId(parent,cellsTable)

In [None]:
def createGTMitosisDf(engine,cellsTable):
    cellsGT = pd.read_sql_query('select * from %s'%(cellsTable),con=engine)
    mitosisdf = pd.DataFrame( columns=['firstGen','born','dead']);
    columns = list(mitosisdf)
    data = [];

    for index,row in cellsGT.iterrows():
        values = [getFirstGenId(row.id,cellsGT),row.born,row.dead];
        zipped = zip(columns,values)
        a_dictionary = dict(zipped)

        tmp = cellsGT.loc[cellsGT['parentid'] == row.id]
        if len(tmp.index) != 0:
            data.append(a_dictionary)
            #print "cell %d is parent from "%row.id,tmp.iloc[0].id,tmp.iloc[1].id
            
    mitosisdf = mitosisdf.append(data,True)
    return mitosisdf

In [None]:
def createGTCellEndDf(engine,cellsTable):
    cellsGT = pd.read_sql_query('select * from %s'%(cellsTable),con=engine)
    apoptosisdf = pd.DataFrame( columns=['firstGen','born','dead']);
    columns = list(apoptosisdf)
    data = [];
    for index,row in cellsGT.iterrows():
        values = [getFirstGenId(row.id,cellsGT),row.born,row.dead];
        zipped = zip(columns,values)
        a_dictionary = dict(zipped)

        tmp = cellsGT.loc[cellsGT['parentid'] == row.id]
        if len(tmp.index) == 0:
            #print "cell %d has no mitosis"%row.id
            data.append(a_dictionary)
    apoptosisdf = apoptosisdf.append(data,True)
    return apoptosisdf

In [None]:
def createProgramMitosisDf(engine,table):
    cycle = programCycle(engine,table)
    df = pd.DataFrame( columns=['firstGen','born','dead']);
    columns = list(df)
    data1 = [];
    for index,row in cycle.iterrows():
        values = [row.myid%100,row.inicio,row.fim];
        zipped = zip(columns,values)
        a_dictionary = dict(zipped)
        if row.myid>100: 
            val = 10**(len(str(row.myid/100))+2)
        else:
            val = 100
        tmp = cycle.loc[(((cycle['myid']/val) > 1) & (cycle['myid']%val==row.myid))]['myid']
        if len(tmp.index) > 0:
            data1.append(a_dictionary)
    df = df.append(data1,True)
    return df

In [None]:
def createProgramCellEndDf(engine,table):
    cycle = programCycle(engine,table)
    cellEndDf = pd.DataFrame( columns=['firstGen','born','dead']);
    columns = list(cellEndDf)
    data = [];
    for index,row in cycle.iterrows():
        values = [row.myid%100,row.inicio,row.fim];
        zipped = zip(columns,values)
        a_dictionary = dict(zipped)
        if row.myid>100: 
            val = 10**(len(str(row.myid/100))+2)
        else:
            val = 100
        tmp = cycle.loc[(((cycle['myid']/val) > 1) & (cycle['myid']%val==row.myid))]['myid']
        if len(tmp.index) == 0:
            data.append(a_dictionary)
    cellEndDf = cellEndDf.append(data,True)
    return cellEndDf

In [68]:
def correspondIds(engine,df,GtDb,port,user,pw):
    df['gtId'] = 0;
    cells_table = pd.read_sql_query('select * from %s'%("cells"),con=engine)
    sqltst = "select gtId,frame from (SELECT geoms.* \
    FROM dblink('dbname=%s port=%s user=%s password=%s', \
    'SELECT * FROM lifetime') \
    AS geoms(gtId smallint ,frame smallint , geom geometry)) as a"%(GtDb,port,user,pw)
    tmp = pd.read_sql_query(sqltst,con=engine)
    for id in cells_table.loc[cells_table['parentid'] == 0]['id']:
        try:
            firstAppear = tmp.loc[tmp['gtid']==id+1].sort_values(by=['frame']).iloc[0];
            sql = "Select myid, a.geom, a.gtId from lifetmp, \
            (SELECT geoms.* \
            FROM dblink('dbname=%s port=%s user=%s password=%s', \
            'SELECT * FROM lifetime where cellid = %s and frame = %s') \
            AS geoms(gtId smallint ,frame smallint , geom geometry)) as a \
            where ST_Contains(lifetmp.geom,ST_centroid(a.geom)) and lifetmp.frame = %s order by cellid"%(GtDb,port,user,pw,firstAppear.gtid,firstAppear.frame,firstAppear.frame)        
            tmp2 = pd.read_sql_query(sql,con=engine);
            
            if tmp2.iloc[0].myid<100 and tmp2.iloc[0].myid != 0 and cells_table.loc[cells_table['id']==firstAppear.gtid]['born'].iloc[0]==0:
            
            #if firstAppear.frame == 0:
            
                df.loc[df['firstGen'] == tmp2.iloc[0].myid, 'gtId'] = firstAppear.gtid

        except:
            print "cell " + str(id) + " doesn't appear"
    
    return df.loc[df['gtId'] != 0];

In [None]:
def totalTrueM(gtDf,programDf):
    count = 0;
    for index,row in gtDf.iterrows():
        if len(programDf.loc[programDf['gtId'] == row.firstGen])>0:
            count+=1;
    return count

In [None]:
def totalTrueA(gtDf,programDf):
    count = 0;
    lastFrame = gtDf.loc[gtDf['dead']==gtDf['dead'].max()]['dead'].iloc[0]
    for index,row in gtDf.iterrows():
        if len(programDf.loc[programDf['gtId'] == row.firstGen])>0 and row.dead <lastFrame:
            count+=1;
    return count

In [None]:
def totalTruePositivesM(gtDf,programDf):
    tmp = gtDf.groupby(['born','dead']).sum().reset_index()
    wellDetected = 0
    for index,row in tmp.iterrows():
        a = gtDf.loc[((gtDf['born']==row.born) & (gtDf['dead']==row.dead))]
        b = programDf.loc[((programDf['born']==row.born) & (programDf['dead']==row.dead))]
        
        
        if len(b)>0:
            for index1,row1 in b.iterrows():
                if(len(a.loc[(a['firstGen']==row1.gtId) ])>0):
                    wellDetected += 1;
    return wellDetected

In [None]:
def totalTruePositivesA(gtDf,programDf):
    tmp = gtDf.groupby(['born','dead']).sum().reset_index()
    wellDetected = 0
    lastFrame = gtDf.loc[gtDf['dead']==gtDf['dead'].max()]['dead'].iloc[0]
    for index,row in tmp.iterrows():
        a = gtDf.loc[((gtDf['born']==row.born) & (gtDf['dead']==row.dead))]
        b = programDf.loc[((programDf['born']==row.born) & (programDf['dead']==row.dead))]
        
        
        if len(b)>0:
            for index1,row1 in b.iterrows():
                if(len(a.loc[(a['firstGen']==row1.gtId) & (a['dead']<lastFrame)])>0):
                    wellDetected += 1;
    return wellDetected

In [None]:
def totalFalseNegativesM(gtDf,programDf):
    return totalTrueM(gtDf,programDf)-totalTruePositivesM(gtDf,programDf);

In [None]:
def totalFalseNegativesA(gtDf,programDf):
    return totalTrueA(gtDf,programDf)-totalTruePositivesA(gtDf,programDf);

In [None]:
def totalFalsePositivesM(gtDf,programDf):
    return len(programDf.loc[programDf['gtId'] != 0])- totalTruePositivesM(gtDf,programDf)

In [None]:
def totalFalsePositivesA(gtDf,programDf):
    lastFrame = gtDf.loc[gtDf['dead']==gtDf['dead'].max()]['dead'].iloc[0]
    return len(programDf.loc[(programDf['gtId'] != 0) & (programDf['dead']<lastFrame)])- totalTruePositivesA(gtDf,programDf)

In [None]:
def totalTrueNegativesM(GTCellEndDf,programDf):
    tmp = GTCellEndDf.groupby(['born','dead']).sum().reset_index()
    wellDetected = 0
    for index,row in tmp.iterrows():
        a = GTCellEndDf.loc[((GTCellEndDf['born']==row.born) & (GTCellEndDf['dead']==row.dead))]
        b = programDf.loc[((programDf['born']==row.born) & (programDf['dead']==row.dead))]
        
        
        if len(b)>0:
            for index1,row1 in b.iterrows():
                if(len(a.loc[(a['firstGen']==row1.gtId) ])>0):
                    wellDetected += 1;
            #possibleWellDetected += min(len(b.loc[b['gtId']==0]),len(a))
    return wellDetected

In [None]:
def showConfusionMatrix(arr,name):
    cm = arr
    ax = sn.heatmap(cm, annot=True,yticklabels=["P","N"],xticklabels=['P','N'],vmax=-1,linewidths=5,fmt='d')
    ax.set(ylabel='Predicted', xlabel='Ground Truth')
    ax.xaxis.set_label_position('top') 
    ax.xaxis.tick_top()
    figure = ax.get_figure();    
    figure.savefig(name+'.png', dpi=400)

In [69]:
engine = main2("postgres","postgres","localhost",5434,"Fluo-N2DH-SIM+_GT")

In [None]:
def evaluateMitosisStats(engine):
    db = "Fluo-N2DH-SIM+";
    gtDf = createGTMitosisDf(engine,"cells");
    programMitosisDf = createProgramMitosisDf(engine,"lifetmp");
    programMitosisDf = correspondIds(engine,programMitosisDf,db,5434,"postgres","postgres")
    #gtCellEndDf = createGTCellEndDf(engine,"cells");
    TP = totalTruePositivesM(gtDf,programMitosisDf)
    FN = totalFalseNegativesM(gtDf,programMitosisDf)
    FP = totalFalsePositivesM(gtDf,programMitosisDf)
    arr = [[TP,FP],[FN,0]];
    showConfusionMatrix(arr,db+'(Mitosis2)')
evaluateMitosisStats(engine)

In [None]:
def evaluateApoptosisStats(engine):
    db = "Fluo-N2DL-HeLa";
    gtDf = createGTCellEndDf(engine,"cells");
    programDf = createProgramCellEndDf(engine,"lifetmp");
    programDf = correspondIds(engine,programDf,db,5434,"postgres","postgres")
    #gtCellEndDf = createGTCellEndDf(engine,"cells");
    TP = totalTruePositivesA(gtDf,programDf)
    FN = totalFalseNegativesA(gtDf,programDf)
    FP = totalFalsePositivesA(gtDf,programDf)
    arr = [[TP,FP],[FN,0]];
    showConfusionMatrix(arr,db+'(Apoptosis)')
evaluateApoptosisStats(engine)

In [None]:
totalFalsePositivesM(gtCellEndDf,programDf)

In [None]:
gtCellEndDf.loc[gtCellEndDf['dead'].idxmax()]

In [None]:
gtCellEndDf.loc[gtCellEndDf['dead']==gtCellEndDf['dead'].max()]['dead'].iloc[0]

In [None]:
totalTrueA(gtCellEndDf,programDf)

In [None]:
totalTruePositivesM(gtCellEndDf,programDf)

In [None]:

df = createProgramMitosisDf(engine,"lifetmp")
print df
df['gtId'] = 0;
cells_table = pd.read_sql_query('select * from %s'%("cells"),con=engine)
#tmp = pd.read_sql_query('select id,frame from %s'%(GT_table),con=engine)
sqltst = "select gtId,frame from (SELECT geoms.* \
FROM dblink('dbname=%s port=%s user=%s password=%s', \
'SELECT * FROM lifetime') \
AS geoms(gtId smallint ,frame smallint , geom geometry)) as a"%("Fluo-N2DH-SIM+2",5434,"postgres","postgres")
tmp = pd.read_sql_query(sqltst,con=engine)
for id in cells_table.loc[cells_table['parentid'] == 0]['id']:
    try:
        firstAppear = tmp.loc[tmp['gtid']==id].sort_values(by=['frame']).iloc[0];
        sql = "Select myid, a.geom, a.gtId from lifetmp, \
        (SELECT geoms.* \
        FROM dblink('dbname=%s port=%s user=%s password=%s', \
        'SELECT * FROM lifetime where cellid = %s and frame = %s') \
        AS geoms(gtId smallint ,frame smallint , geom geometry)) as a \
        where ST_Contains(lifetmp.geom,ST_centroid(a.geom)) and lifetmp.frame = %s order by cellid"%("Fluo-N2DH-SIM+2",5434,"postgres","postgres",firstAppear.gtid,firstAppear.frame,firstAppear.frame)
        #print sql
        tmp2 = pd.read_sql_query(sql,con=engine);
        if tmp2.iloc[0].myid<100 and tmp2.iloc[0].myid != 0:
            df.loc[df['firstGen'] == tmp2.iloc[0].myid, 'gtId'] = firstAppear.gtid
        print tmp2.iloc[0].myid,firstAppear.gtid,firstAppear.frame

    except:
        print "what";
    
print df

In [None]:

cells_table = pd.read_sql_query('select * from %s'%("cells"),con=engine)
tmp = pd.read_sql_query('select * from %s'%("lifetime"),con=engine)
for id in cells_table.loc[cells_table['parentid'] == 0]['id']:
    try:
        firstAppear = tmp[['id','frame']].loc[tmp['id']==id].sort_values(by=['frame']).iloc[0];
        sql3 = "select * from %s \
        join %s \
        on %s.geom = %s.geom \
        where %s.frame = %s and %s.frame = %s and id = %s"%(table,GT_table,table,GT_table,GT_table,firstAppear['frame'],table,firstAppear['frame'],firstAppear['id']);
        same = pd.read_sql_query(sql3 ,con=engine)
        print same.cellid[0],same.id[0]
        engine.execute("UPDATE %s SET myId = %s WHERE id = %s;"%(table,same.cellid[0],same.id[0]));
    except:
        print "cell " + str(id) + " has no representation"

In [None]:
sql = "Select cellid, a.geom, a.gtId from lifetime, \
(SELECT geoms.* \
FROM dblink('dbname=%s port=%s user=%s password=%s', \
'SELECT * FROM lifetime where frame = 0') \
AS geoms(gtId smallint ,frame smallint , geom geometry)) as a \
where ST_Contains(lifetime.geom,ST_centroid(a.geom)) and lifetime.frame = 0 order by cellid"%(GtDb,port,user,pw)

tmp = pd.read_sql_query(sql,con=engine)
programDf['gtId'] = 0
for index,row in tmp.iterrows():
    programDf.loc[programDf['firstGen'] == row.cellid, 'gtId'] = row.gtid
return programDf

In [77]:
def createRes_track(engine,table,path):
    engine.execute("UPDATE %s set parentid = 0 where frame = 0"%(table))
    
    sql = '''select myid, inicio, fim, parentid
    from
    (select lifetmp.myid, min(frame) inicio, max(frame) fim, b.parentid parentid
    from %s 
    JOIN
    (select myid, parentid from lifetmp where parentid is not null) as b
    on lifetmp.myid = b.myid
    where lifetmp.myid != 0 
    group by lifetmp.myid,b.parentid) a
    order by myid'''%(table)
    newId = 1;
    pgCycle = pd.read_sql_query(sql,con=engine);
    pgCycle["myid"] = pgCycle.myid.apply(lambda x: x+100)
    
    #TESTES - apenas 5 frames
    pgCycle.loc[pgCycle['fim']>5, 'fim'] = 5
    pgCycle = pgCycle.drop(pgCycle[pgCycle.fim<pgCycle.inicio].index)
    path += "res_track.txt"
    pgCycle.to_csv(path,sep=' ', index=False, header=False)
createRes_track(engine,"lifetmp","RES\\")

In [67]:
data = pd.read_csv('GT\\man_track.txt', sep=" ", header=None)
data.columns = ["myid", "inicio", "fim", "parentid"]
data.loc[data['fim']>5, 'fim'] = 5
data = data.drop(data[data.fim<data.inicio].index)
data.to_csv('GT\\man_track.txt',sep=' ', index=False, header=False)