In [28]:
import os
import re
import cv2
import pandas as pd
import numpy as np
import json
import csv
import re

class Database:
    def __init__(self, videos_path = None, motion_path = None, eegs_path = None):
        self.videos_path = videos_path
        if(type(videos_path) == type(None)):
            self.videos_path = "/data/p_01888/Databook_cleaning/Video/"

        self.motion_path = motion_path
        if(type(motion_path) == type(None)):
            self.motion_path = '/data/pt_01888/motionData/'

        self.eegs_path = motion_path
        if(type(motion_path) == type(None)):
            self.eegs_path = '/data/p_01888/Databook_cleaning/EEG/'

        self.json_filename = "database.json"
        self.csv_filename = "database.csv"


    def init_via_videos(self):
        """ Search for videos and append them to a dictionary structure.
            Use this structure as basis for the database.
        """
        if(type(self.videos_path)==type(None)):
            print("Set videospath first")
        self.dictionary = Database.compute_dict(self.videos_path)

    def save_as_json(self):
        """ Saves the database dictionary as a .json file """

        with open(self.json_filename, 'w') as outfile:
            json.dump(self.dictionary, outfile)

    def load_json(self, filepath = None):
        """ Loads the database from as .json file """
        if(type(filepath) == type(None)):
            filepath = self.json_filename
        with open(filepath, 'r') as file:
            self.dictionary = json.load(file)

    def save_as_csv(self):
        """ Saves as Comma seperated file """
        dataframe = self.get_dataframe()
        dataframe.to_csv(self.csv_filename, sep='\t', encoding='utf-8')

    @staticmethod
    def n_frames(path):
        """ Obtain number of frames from videos using CV2 """
        cap = cv2.VideoCapture(path)
        frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        return frames

    def get_dict(self):
        """ Getter function for the dictionary that represents the database"""
        return self.dictionary

    def get_keys_of_level(self, lvl = 0, dictionary = None):
        """ Returns all keys for a given level in the dictionary"""
        #We always call with subtrees thus we need subfunction rec_keys_of_level
        if(dictionary==None):
            return self.rec_keys_of_level(self.dictionary, lvl)
        else:
            return self.rec_keys_of_level(dictionary, lvl)

    def rec_keys_of_level(self, subtree,lvl):
        """ Recursive helper method of get_keys_of_level(...)"""
        #if level is 0 return list of get_keys
        if(lvl <= 0):
            ret_lst = []
            try:
                for key, value in subtree.items():
                    ret_lst.append(key)
            except:
                pass
            return ret_lst
        else:
            ret_lst = []
            try:
                for key, value in subtree.items():
                    if(isinstance(value, dict)):
                        ret_lst.extend(self.rec_keys_of_level(value, lvl-1))
            except:
                pass
            return ret_lst

        #otherwise call recursively and return result

    def set_motion_frame(self, dyad, y1, x1, y2, x2, video_number, child = None, mother = None):
        if child == None and mother == None:
            print("Either child or mother must be specified as True")

    @staticmethod
    def compute_dict(path):
        """ Computes the dictionary by checking for valid video files in path and subdirectories.
            Creates a nested dictionary for all dyads and strores filepathes within.
            Path specifies a folder that contains subfolders called Pilot_0 ... Pilot_99
            Only files are processed that match the syntax exemplified by coSMIC_all_P01.wmv
            The dict may be accessed e.g. by returned_dict[0]["video"][2]["path"]
        """
        for root, dirs, files in os.walk(path):#Go through all dirs and files in current directory
            foldername = root.split(os.sep)[-1] #isolate last part of path
            directories = {}

            sorted_files = []#Sort files by name before beginning
            for f in files:
                sorted_files.append(f)
            sorted_files.sort()

            for file in sorted_files:# Go through files and check if they are a valid video
                if(re.match(".*coSMIC_all_P[0-9][0-9]_C[0-9]\\.wmv", root+file)):
                    attributes = {}
                    attributes["path"] = root + os.sep +file # sep equals / or \ respectively on UNIX and Windows

                    attributes["n_frames"] = Database.n_frames(root + os.sep +file)

                    camera = int(re.search("C[0-9]", file).group(0)[1:])
                    directories[camera] = attributes

            sorted_dirs = []# sort directories before beginning
            for d in dirs:
                sorted_dirs.append(d)
            sorted_dirs.sort()

            for d in sorted_dirs:#Append a number for each dyad
                contents = {}
                if(re.match("Pilot_[0-9][0-9]", d)):
                    pair = int(re.search("[0-9][0-9]", d).group(0))
                    directories[pair] = {}
                    directories[pair]['video'] = Database.compute_dict(root+d)
            return directories

    def get_dataframe(self):
        """ Get a pandas dataframe representation of the database.
            Convert self.dictionary to a dataframe
        """
        d = self.dictionary
        pairs = [key for key in d.keys()]
        self.un_id = [] # Will store e.g. [['video', 1, 'path'], ['video', 1, 'n_frames']...]

        for value in d.values():#Toplevel will make the rows
            self.acc_rec(value, [])# Retrieve keys to final values in the nested structure e.g [['video', 1, 'path'],...]

        #Make em unique i.e. avoid having same list twice in outer list
        self.un_id = [list(x) for x in set(tuple(i) for i in self.un_id)]
        self.un_id.sort()


        headers = ['.'.join([str(c) for c in x]) for x in self.un_id]#Get a string representation of each sublist

        ndarray = np.ndarray((max(pairs),len(self.un_id)), dtype=object)#Rows x columns

        for y in range(ndarray.shape[0]):
            for x in range(ndarray.shape[1]):
                val = None# FOR DEEPER NESTINGS ADJUST HERE:
                try: # select line by y i.e. first level entry in dict. Within subtree: Get
                    attr = self.un_id[x]

                    if(len(attr)==5):#e.g. ['eeg', 'metainfo', 'type', 48]
                        val = d[y][attr[0]][attr[1]][attr[2]][attr[3]][attr[4]]
                    if(len(attr)==4):#e.g. ['eeg', 'metainfo', 'type', 48]
                        val = d[y][attr[0]][attr[1]][attr[2]][attr[3]]
                    if(len(attr)==3):#e.g.[motion,1,path]
                        val = d[y][attr[0]][attr[1]][attr[2]]
                    if(len(attr)==2):#e.g. ['eeg', 'path']
                        val = d[y][attr[0]][attr[1]]
                    if(len(attr)==1):
                        val = d[y][attr[0]]

                except:# Sometimes values are not present because d[y] is none i.e. pair data is missing
                    ndarray[y][x] = None
                ndarray[y][x] = val

        dataframe = pd.DataFrame(ndarray, columns=headers)

        return dataframe


    def acc_rec(self, node, prefix):
        """
            Appends lists of keys e.g. ['video', 1, 'path'] or ['eeg', 'metainfo', 'channel', 3]
                    to self.un_id recursively
        """
        for key, value in node.items():
            if(isinstance(value, dict)):
                new =[]
                new.extend(prefix)
                new.append(key)
                self.acc_rec(value, new)
            else:
                new = []
                new.extend(prefix)
                new.append(key)
                self.un_id.append(new)

    def integrate_motion(self):
        """
            Check .mot if file exists for given video and naming conventions.
            Add filepath of .mot data.
        """
        d = self.dictionary
        for pair, v in list(d.items()):#USE LIST WHEN MODIFYING DURING ITERATING THROUGH DICT
            for isvideo, v1 in list(d[pair].items()):
                for n_video, v2 in list(d[pair][isvideo].items()):
                    path = self.motion_path +"coSMIC_all_P"+str(pair)+"_C"+str(n_video)+".mot"
                    if(os.path.isfile(path)):
                        try:#IF APPENDING DEEPER STRUCTURES THAT DIDN'T EXIST APPEND EMPTY DICT FIRST
                            self.dictionary[pair]["motion"]#Does it exist?
                        except:
                            self.dictionary[pair]["motion"] = {}#If not create
                        self.dictionary[pair]["motion"][n_video] = {"path" : path}



    def integrate_raw_eegs(self):
        """ Check EEG files (.eeg and .vmrk) exist for given video and naming conventions.
            Load data if found and add filepath of eeg.
        """
        d = self.dictionary
        for pair, v in list(d.items()):
            for isvideo, v1 in list(d[pair].items()):
                for n_video, v2 in list(d[pair][isvideo].items()):
                    path = self.eegs_path +"coSMIC_all_P"+str(pair)+".eeg"
                    path1 = self.eegs_path +"coSMIC_all_P"+str(pair)+".vmrk"

                    if(os.path.isfile(path) and os.path.isfile(path1)):
                        self.dictionary[pair]["eeg"] = {}#If not create
                        self.dictionary[pair]["eeg"]["path"] = path
                        self.dictionary[pair]["eeg"]["metainfo"] = Database.parse_vmrk(path1)


    @staticmethod
    def parse_vmrk(path):
        """ Parses vmrk file and returns a dictionary containing the information.
            The keys denote the kind of data whereas the values are a dictionary
        """

        with open(path) as f:
            content = f.readlines()

        data = {'marker number':{}, 'type':{}, 'description':{}, 'position':{}, 'size':{}, 'channel':{}}

        entry = 0
        for line in content:
            match = re.match("Mk", line)
            if(match != None):
                entry += 1
                markers = re.search("[0-9][0-9]?", line)
                data["marker number"][entry] = (int(markers.group(0)))
                line = line[markers.end():]#use rest of line only next

                markers = re.match("(.*?),",line)
                data["type"][entry] = markers.group(1)[1:]#Group 1 is exclusive , while group 0 is inclusive ,
                line = line[markers.end():]

                markers = re.search("(.*?),",line)
                data["description"][entry] = markers.group(1)
                line = line[markers.end():]

                markers = re.search("(.*?),",line)
                data["position"][entry] = int('0' + markers.group(1))# '0' + is necessary as some fields are empty
                line = line[markers.end():]

                markers = re.search("(.*?),",line)
                data["size"][entry] = int('0' + markers.group(1))
                line = line[markers.end():]

                try:#In the first line there is an additionally value we dont want to parse
                    data["channel"][entry] = int('0' + line)
                except:
                    data["channel"][entry] = 0
        return data

In [29]:
#Database.compute_dict('/data/p_01888/Databook_cleaning/Video/')

In [30]:
base = Database()

In [31]:
base.init_via_videos()
base.get_dict()

{1: {'video': {1: {'n_frames': 21414,
    'path': '/data/p_01888/Databook_cleaning/Video/Pilot_01/coSMIC_all_P01_C1.wmv'},
   2: {'n_frames': 21417,
    'path': '/data/p_01888/Databook_cleaning/Video/Pilot_01/coSMIC_all_P01_C2.wmv'},
   3: {'n_frames': 21416,
    'path': '/data/p_01888/Databook_cleaning/Video/Pilot_01/coSMIC_all_P01_C3.wmv'}}},
 2: {'video': {1: {'n_frames': 12421,
    'path': '/data/p_01888/Databook_cleaning/Video/Pilot_02/coSMIC_all_P02_C1.wmv'},
   2: {'n_frames': 12421,
    'path': '/data/p_01888/Databook_cleaning/Video/Pilot_02/coSMIC_all_P02_C2.wmv'},
   3: {'n_frames': 12421,
    'path': '/data/p_01888/Databook_cleaning/Video/Pilot_02/coSMIC_all_P02_C3.wmv'}}},
 3: {'video': {1: {'n_frames': 47309,
    'path': '/data/p_01888/Databook_cleaning/Video/Pilot_03/coSMIC_all_P03_C1.wmv'},
   2: {'n_frames': 47307,
    'path': '/data/p_01888/Databook_cleaning/Video/Pilot_03/coSMIC_all_P03_C2.wmv'},
   3: {'n_frames': 47307,
    'path': '/data/p_01888/Databook_cleaning/V

In [32]:
base.integrate_motion()

In [33]:
vmrk = base.parse_vmrk('/data/p_01888/Databook_cleaning/EEG/coSMIC_all_P1.vmrk')

In [34]:
base.integrate_raw_eegs()

In [35]:
print(base.get_dict()[28]["eeg"]["metainfo"]["description"])
#print(base.get_dict()[28]["eeg"]["metainfo"]["position"])
#print(base.get_dict()[28]["video"][2]["n_frames"])

{1: '', 2: 'ControlBox is not connected via USB', 3: 'R128', 4: 'S 13', 5: 'S 10', 6: 'S 11', 7: 'S 40', 8: 'S 41', 9: 'S 40', 10: 'S 43', 11: 'S 40', 12: 'S 41', 13: 'S 40', 14: 'S 42', 15: 'S 40', 16: 'S 41', 17: 'S 40', 18: 'S 46', 19: 'S 40', 20: 'S 41', 21: 'S 40', 22: 'S 45', 23: 'S 40', 24: 'S 41', 25: 'S 40', 26: 'S 49', 27: 'S 40', 28: 'S 41', 29: 'S 40', 30: 'S 48', 31: 'S 10', 32: 'R128'}


In [36]:
base.get_dataframe()

Unnamed: 0,eeg.metainfo.channel.1,eeg.metainfo.channel.2,eeg.metainfo.channel.3,eeg.metainfo.channel.4,eeg.metainfo.channel.5,eeg.metainfo.channel.6,eeg.metainfo.channel.7,eeg.metainfo.channel.8,eeg.metainfo.channel.9,eeg.metainfo.channel.10,...,motion.3.path,motion.4.path,video.1.n_frames,video.1.path,video.2.n_frames,video.2.path,video.3.n_frames,video.3.path,video.4.n_frames,video.4.path
0,,,,,,,,,,,...,,,,,,,,,,
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,21414.0,/data/p_01888/Databook_cleaning/Video/Pilot_01...,21417.0,/data/p_01888/Databook_cleaning/Video/Pilot_01...,21416.0,/data/p_01888/Databook_cleaning/Video/Pilot_01...,,
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,12421.0,/data/p_01888/Databook_cleaning/Video/Pilot_02...,12421.0,/data/p_01888/Databook_cleaning/Video/Pilot_02...,12421.0,/data/p_01888/Databook_cleaning/Video/Pilot_02...,,
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,47309.0,/data/p_01888/Databook_cleaning/Video/Pilot_03...,47307.0,/data/p_01888/Databook_cleaning/Video/Pilot_03...,47307.0,/data/p_01888/Databook_cleaning/Video/Pilot_03...,,
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,34718.0,/data/p_01888/Databook_cleaning/Video/Pilot_04...,34716.0,/data/p_01888/Databook_cleaning/Video/Pilot_04...,34717.0,/data/p_01888/Databook_cleaning/Video/Pilot_04...,,
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,26955.0,/data/p_01888/Databook_cleaning/Video/Pilot_05...,26956.0,/data/p_01888/Databook_cleaning/Video/Pilot_05...,26956.0,/data/p_01888/Databook_cleaning/Video/Pilot_05...,,
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,22905.0,/data/p_01888/Databook_cleaning/Video/Pilot_06...,22907.0,/data/p_01888/Databook_cleaning/Video/Pilot_06...,22908.0,/data/p_01888/Databook_cleaning/Video/Pilot_06...,,
7,,,,,,,,,,,...,,,49968.0,/data/p_01888/Databook_cleaning/Video/Pilot_07...,49965.0,/data/p_01888/Databook_cleaning/Video/Pilot_07...,49965.0,/data/p_01888/Databook_cleaning/Video/Pilot_07...,,
8,,,,,,,,,,,...,,,19819.0,/data/p_01888/Databook_cleaning/Video/Pilot_08...,19816.0,/data/p_01888/Databook_cleaning/Video/Pilot_08...,19818.0,/data/p_01888/Databook_cleaning/Video/Pilot_08...,,
9,,,,,,,,,,,...,,,11085.0,/data/p_01888/Databook_cleaning/Video/Pilot_09...,11083.0,/data/p_01888/Databook_cleaning/Video/Pilot_09...,11083.0,/data/p_01888/Databook_cleaning/Video/Pilot_09...,11083.0,/data/p_01888/Databook_cleaning/Video/Pilot_09...


In [None]:
base.save_as_json()

In [21]:
base.save_as_csv()

In [22]:
base.load_json()#After loading keys are strings

In [23]:
dyad = 33
pos = 10000000000000000
try:
    for key, value in pair[str(dyad)]['eeg']['metainfo']['description'].items():#Search for R128
        if(value == 'R128'):
            newpos = int(pair[str(dyad)]['eeg']['metainfo']['position'][key])
            if(newpos < pos):#find smallest R128 value
                pos = newpos
except:
    pass

In [25]:
base.get_dict()

{'1': {'eeg': {'metainfo': {'channel': {'1': 0,
     '10': 0,
     '11': 0,
     '12': 0,
     '13': 0,
     '2': 0,
     '3': 0,
     '4': 0,
     '5': 0,
     '6': 0,
     '7': 0,
     '8': 0,
     '9': 0},
    'description': {'1': '',
     '10': 'S 10',
     '11': 'S 14',
     '12': 'S 10',
     '13': 'S128',
     '2': 'ControlBox is not connected via USB',
     '3': 'S 10',
     '4': 'S 13',
     '5': 'S128',
     '6': 'S 10',
     '7': 'S 12',
     '8': 'S 10',
     '9': 'S 13'},
    'marker number': {'1': 1,
     '10': 10,
     '11': 11,
     '12': 12,
     '13': 13,
     '2': 2,
     '3': 3,
     '4': 4,
     '5': 5,
     '6': 6,
     '7': 7,
     '8': 8,
     '9': 9},
    'position': {'1': 1,
     '10': 226228,
     '11': 335396,
     '12': 425398,
     '13': 460746,
     '2': 11,
     '3': 19487,
     '4': 29007,
     '5': 32412,
     '6': 59016,
     '7': 82254,
     '8': 185691,
     '9': 196219},
    'size': {'1': 1,
     '10': 1,
     '11': 1,
     '12': 1,
     '13': 1,
 

In [39]:
from IPython.display import SVG
from IPython.display import HTML

print_direc = {'Pair 1': base.get_dict()[1]}
string = Dotstring.assemble_dotstring(print_direc)

graphs = pydot.graph_from_dot_data(string)
svg_string = graphs[0].create_svg() 
display(SVG(svg_string))


HTML("<style>svg{width:50% !important;height:50% !important;}</style>")

NameError: name 'Dotstring' is not defined