In [1]:
with open("C:\\Users\\pace\\Google Drive\\Documents\\EPFL\\MA3\\Sem Proj\\FROG-analytics\\etherpad\\var\\dirty.db") as f:
    lines = f.readlines()

In [2]:
salut=""
salut.isalnum

<function str.isalnum>

In [3]:
class ElementaryOperation:
    """
    Elementary operation (finest granularity). Such as addition or removal of one letter or a very short sequence.
    
    operation_type: "add" or "del"
    abs_position: position in document
    lenght_to_delete: how many characters we should remove if the op is "del" from the position of the op. Not taken into account otherwise
    text_to_add: Text to add if th eop is "add". Not taken into account otherwise.
    line_number: line number not necessary since we have the position
    position_inline: position in the current line. Not necessary since we have the position
    author
    timestamp
    pad name
    revs: version number
    changeset: Original information encoded in Etherpad format (http://policypad.readthedocs.io/en/latest/changesets.html)
    """
    def __init__(self,operation_type,abs_position,
                 length_to_delete=None,
                 text_to_add=None,
                 line_number=None,
                 position_inline=None,
                 author=None,
                 timestamp=None,
                 pad_name=None,
                 revs=None,
                 changeset=None,
                 belong_to_operation=None):
        if operation_type=="add":
            self.operation_type="add"
            self.text_to_add=text_to_add
        elif operation_type=="del":
            self.operation_type="del"
            self.length_to_delete=length_to_delete
            
        self.abs_position=abs_position
        self.line_number=line_number
        self.position_inline=position_inline
        self.author=author
        self.timestamp=timestamp
        self.pad_name=pad_name
        self.revs=revs
        self.changeset=changeset
        self.belong_to_operation=belong_to_operation
        else:
            raise AttributeError("Undefined elementary operation")
    def __str__(self):
        return ("Operation:"+str(self.operation_type)+
                "\nPosition:"+str(self.abs_position)+
                ("\nText to add:"+str(self.text_to_add) if self.operation_type=='add' else "\nLength to delete:"+str(self.length_to_delete))+
                "\nLine number:"+str(self.line_number)+
                "\nPosition inline:"+str(self.position_inline)+
                "\nAuthor:"+str(self.author)+
                "\nTimestamp:"+str(self.timestamp)+
                "\nPad Name:"+str(self.pad_name)+
                "\nRevs:"+str(self.revs)+
                "\nOriginal changeset:"+str(self.changeset)+
                "\nBelong to Operation:"+str(self.belong_to_operation))
    
        
        
def parse_changeset(changeset):
    """
    Parse a changeset into a list of elementary operations. There will be missing the author, timestamp...
    http://policypad.readthedocs.io/en/latest/changesets.html
    """
    
    def findNextSymbolIdx(string,start):
        """
        Find the next symbol from start
        """
        SYMBOLS=['|','$','+','-','=','*']
        for i in range(start,len(string)):
            if string[i] in SYMBOLS:
                return i
        # We reached the end of the text
        return len(string)
        
    line_number=0
    line_abs_position=0
    position=0
    position_inline=0
    elementary_operations=[]
    # Finding the first operations. It's always a |
    idx=changeset.find('|')
    while(idx<len(changeset)):
        if(changeset[idx]=='|'):
            # It's going to be taken care of by changeset[idx].isalnum()
            idx+=1
        elif(changeset[idx]=='$'):
            # Should have already been added with the '+'
            return elementary_operations
        elif(changeset[idx].isalnum()):
            # Format is |L+N,|L-N or |L=N
            symbol_idx=findNextSymbolIdx(changeset,idx)
            
            if(changeset[symbol_idx]=='='):
                # |L+N
                # Keep N characters from the source text, containing L newlines.
                # The last character kept MUST be a newline, and the final newline
                # of the document is allowed.
                # L
                line_number += int(changeset[idx:symbol_idx],36)
                next_symbol_position = findNextSymbolIdx(changeset,symbol_idx+1)
                # N
                line_abs_position+=int(changeset[symbol_idx+1:next_symbol_position],36)
                position+=line_abs_position
                idx=next_symbol_position
            elif(changeset[symbol_idx]=='+'):
                # |L+N
                # Insert N characters from the source text, containing L newlines. 
                # The last character inserted MUST be a newline, but not the (new) 
                # document’s final newline.
                
                idx=findNextSymbolIdx(changeset,symbol_idx+1)
                # We don't care about L or N (size of the addition). We only look at the text added
                data_bank=changeset[changeset.find('$')+1:]
                
                elementary_operations.append(ElementaryOperation("add",
                                                                 position,
                                                                 text_to_add=data_bank,
                                                                 line_number=line_number,
                                                                 position_inline=position_inline,
                                                                 changeset=changeset))    
            else: 
                assert(changeset[symbol_idx]=='-')
                
                # |L-N
                # Delete N characters from the source text, containing L newlines.
                # The last character inserted MUST be a newline, but not the (old)
                # document’s final newline.
                next_symbol_position=findNextSymbolIdx(changeset,symbol_idx+1)
                # N
                chars_to_delete=int(changeset[idx+1:next_symbol_position],36)
                idx=next_symbol_position
                elementary_operations.append(ElementaryOperation("del",
                                                                 position,
                                                                 length_to_delete=chars_to_delete,
                                                                 line_number=line_number,
                                                                 position_inline=position_inline,
                                                                 changeset=changeset))            
        elif(changeset[idx]=='='):
            # Keep N characters from the source text, none of them newlines
            # (position inline)
            next_symbol_position=findNextSymbolIdx(changeset,idx+1)
            # We add the inline offset
            position+=int(changeset[idx+1:next_symbol_position],36)
            position_inline+=int(changeset[idx+1:next_symbol_position],36)
            idx=next_symbol_position
        elif(changeset[idx]=='+'):
            idx=findNextSymbolIdx(changeset,idx+1)
            # We don't care about L or N (size of the addition). We only look at the text added
            data_bank=changeset[changeset.find('$')+1:]
            elementary_operations.append(ElementaryOperation("add",
                                                            position,
                                                            text_to_add=data_bank,
                                                            line_number=line_number,
                                                            position_inline=position_inline,
                                                             changeset=changeset))
        elif(changeset[idx]=='-'):
            # Remove the next n symbols
            next_symbol_position=findNextSymbolIdx(changeset,idx+1)
            chars_to_delete=int(changeset[idx+1:next_symbol_position],36)
            idx=next_symbol_position
            elementary_operations.append(ElementaryOperation("del",
                                                             position,
                                                             length_to_delete=chars_to_delete,
                                                             line_number=line_number,
                                                             position_inline=position_inline,
                                                             changeset=changeset))
        else:
            # TODO: Format with '*'
            assert changeset[idx]=='*'
            idx=findNextSymbolIdx(changeset,idx+1)

In [10]:
list_of_elem_ops_per_pad=dict()
for line in lines:
    # We look at relevant log lines
    if '{"key":"pad:' in line :
        line_changed=line.replace("false","False").replace("null","None")
        line_dict=dict(eval(line_changed))
        if('revs' in line_dict['key']):
            pad_name_idx = line_dict['key'].find("pad:")+len("pad:")
            pad_name_end_idx = line_dict['key'].find(':revs:',pad_name_idx)
            pad_name = line_dict['key'][pad_name_idx:pad_name_end_idx]
            revs=int(line_dict['key'][pad_name_end_idx+len('revs:')+1:])
            changeset=line_dict['val']['changeset']
            author_name= line_dict['val']['meta']['author']
            timestamp= line_dict['val']['meta']['timestamp']
            
            elem_ops=parse_changeset(changeset)
            for elem_op in elem_ops:
                elem_op.author=author
                elem_op.timestamp=timestamp
                elem_op.revs=revs
                elem_op.pad_name=pad_name
            if not(pad_name in list_of_elem_ops_per_pad.keys()):
                list_of_elem_ops_per_pad[pad_name]=[]
            list_of_elem_ops_per_pad[pad_name].append(elem_ops)

for pad_name in list_of_elem_ops_per_pad:

salut
qqch


In [11]:
class Pad:
    operations=[]
    def __init__(self, pad_name):
        self.pad_name=pad_name
    def add_operation(operation):
        operations.append(operation)
    def add_operations(operations):
        for op in operations:
            add_operation(operation)
    def get_elem_ops_ordered():
        # TODO
        raise NotImplementedError
    def get_text():
        # TODO
        raise NotImplementedError
    def display_text_colored_by_ops():
        raise NotImplementedError
        # TODO: For Louis
class Operation:
    elem_ops=[]
    def __init__(self, author, timestamp_start):
        self.author=author
        self.timestamp_start=timestamp_start
    def add_elem_op(elem_op):
        self.timestamp_end=elem_op.timestamp
        elem_op.belong_to_operation=self
        self.elem_ops.append(elem_op)
    def add_elem_ops(elem_ops):
        for elem_op in elem_ops:
            add_elem_op(elem_op)