In [3]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

In [4]:
path = '../input/glioblastoma-radiosensitivity-dataset/glioblastoma_data/extracted_data/cell_behaviors/videos_transcriptions_csv'
print(path)

../input/glioblastoma-radiosensitivity-dataset/glioblastoma_data/extracted_data/cell_behaviors/videos_transcriptions_csv


In [5]:
def import_files(path):
    struc = pd.DataFrame(columns =["radiation_level", "path","code_file","group"])
    read = os.walk(path)
    for root, dirs, files in read:
        for name in files:
            if name.endswith(".csv"):
                #print(name)
                radiation = name.split("gy")
                [_,group,_] = name.split("_") 
                filepath = path+ '/' + name
                code_file = name[:-4] #removing suffix
                
                struc = struc.append({"radiation_level" : int(radiation[0]), "path" : filepath, "code_file" : code_file, "group" : group}, ignore_index=True)
                
    return struc

In [7]:
def str_to_numpy(input_str):
    """
    str_to_numpy is used to parse the columns "input" and "output" of the dataset csv files.
    It converts a string representing an array of cells to a numpy array containing strings
    which are the names of these cells.
    """
    process_str = input_str.replace("[", "")
    process_str = process_str.replace("'", "")
    process_str = process_str.replace(" ", "")
    process_str = process_str.replace("]", "")
    if(process_str != ""):
        process_str = process_str.split(",")
        output_array = np.asarray(process_str, dtype=str)
    else:
        output_array = np.asarray([], dtype=str)
    return output_array

In [8]:
file = path + "/0gy_n1_201104.nd2u2510Gy1A.csv"
data = pd.read_csv(file, converters = {"input" : str_to_numpy, "output" : str_to_numpy}) #imports a dataframe from a csv 
                  # and converts its string components to numpy
data.head()



Unnamed: 0,time,event,input,output
0,0,begin,[],[C1]
1,66,div,[C1],"[C1.1, C1.2]"
2,238,div,[C1.1],"[C1.1.1, C1.1.2]"
3,386,div,[C1.1.1],"[C1.1.1.1, C1.1.1.2]"
4,432,end,[C1.1.1.1],[]


## Algorithm :

We suppose that trees do not merge.

1. List begin events -> give us the root cells for each tree
2. Group together cells which contains the name of previous cells ("C1" and "C1.1" e.g.)
   1. Create a DataFrame with [root_cell, cell_name, time, event, input, ouput]

In [9]:
roots = data.loc[data.event == 'begin'].output
roots.head()

0     [C1]
16    [C2]
23    [C3]
39    [C4]
49    [C5]
Name: output, dtype: object

In [10]:
class tree:
    cells = pd.DataFrame(columns = ["root_cell", "cell_name", "time","event","input","output"])
    def __init__(self):
        self.cells = pd.DataFrame(columns = ["root_cell", "cell_name", "time","event","input","output"])
    


In [20]:
def root_cell_test(row, root): #Tests if the row corresponds to the root cell
    if row.input[0].startswith(root):
        return True
    return False



def cells_to_trees(experience): #experience is a DataFrame
    #Select root cells in experience
    root_cells = experience.loc[experience.event == 'begin'].output 

    #Creation of a tree corresponding to the experience
    trees = tree()
    for root in root_cells:
        exp_bis = experience["input" != "begin"]
        f = lambda x:root_cell_test(row, root)
        root_cell = exp_bis[]
        trees.cells.append({"root_cell" : root, "cell_name" : root_cell.input[0], 
        "time" : root_cell.time,"event" : root_cell.event,"input" : root_cell.input,"output" : root_cell.output}, ignore_index=True)
    return trees

In [21]:
#TEST

experience = pd.read_csv(file, converters = {"input" : str_to_numpy, "output" : str_to_numpy})


trees = cells_to_trees(experience)

trees.cells.head()


AttributeError: 'list' object has no attribute 'startswith'

In [1]:
def list_of_trees(data_frame):
    trees_list = pd.DataFrame(columns =["tree_id", "tree"])
    for idx in data_frame.index:

        #Select experience correspondingto index idx
        path = data_frame.loc[idx,'path']
        file_name = data_frame.loc[idx,'code_file']
        file_path = path + "/"+ file_name + ".csv"
        experience = pd.read_csv(file_path, converters = {"input" : str_to_numpy, "output" : str_to_numpy})

        trees = cells_to_trees(experience)
        
        #Add experience trees to the list of all trees
        trees_list.append({"tree_id" : })