An experiment using temporal link decay within the model.

The insight here is that, if we are using the multivalent edges, then the link decay becomes more challenging. It is not just the node(i,j) but the type of edge between them. 

In [1]:
import pandas as pd
from midiutil import MIDIFile
import time
from yappyChuck import Client
client = Client()

In [2]:
#let's set up midi stuff
track    = 0
channel  = 0
track_time     = 0    # In beats
duration = 1    # In beats
tempo    = 60   # In BPM
volume   = 62  # 0-127, as per the MIDI standard

In [3]:
enmi = pd.read_csv('../enmi.csv')

In [4]:
sessions = pd.read_csv('../sessions.csv')
annotations = pd.read_csv('../annotations.txt')

Set up the sessions for the graph. 

In [5]:
annotation_data = pd.merge(enmi,annotations, how='left', on='id')
annotation_data.head()

Unnamed: 0.1,Unnamed: 0,created_at,id,id_str,text,truncated,source,in_reply_to_status_id,in_reply_to_status_id_str,in_reply_to_user_id,...,quoted_status.contributors,quoted_status.is_quote_status,quoted_status.retweet_count,quoted_status.favorite_count,quoted_status.favorited,quoted_status.retweeted,quoted_status.lang,ann,start,end
0,0,Wed Dec 19 13:30:12 +0000 2018,1075382796179386369,1075382796179386369,"@robywebo @OdileA Bonjour, je parlais du panel...",False,"<a href=""http://twitter.com/download/iphone"" r...",1.075374e+18,1.075374e+18,182802200.0,...,,,,,,,,,454000,454000
1,1,Wed Dec 19 13:33:53 +0000 2018,1075383722323689472,1075383722323689472,#enmi18 @vincentpuig ou se trouve ces outils? ...,False,"<a href=""https://polemictweet.com"" rel=""nofoll...",,,,...,,,,,,,,,675000,675000
2,2,Wed Dec 19 13:34:23 +0000 2018,1075383846185635840,1075383846185635840,#enmi18 Paolo Vignola et Sara Baranzoni : inve...,False,"<a href=""https://polemictweet.com"" rel=""nofoll...",,,,...,,,,,,,,REF,705000,705000
3,3,Wed Dec 19 13:40:59 +0000 2018,1075385508635783170,1075385508635783170,Dernier session des #enmi18 : Le terrain du te...,True,"<a href=""https://about.twitter.com/products/tw...",,,,...,,,,,,,,REF,1101000,1101000
4,4,Wed Dec 19 13:45:22 +0000 2018,1075386610961784837,1075386610961784837,@Isabell42560134 #enmi18 Suivez les liens vers...,False,"<a href=""http://twitter.com"" rel=""nofollow"">Tw...",1.075384e+18,1.075384e+18,1.070035e+18,...,,,,,,,,REF,1364000,1364000


## Set up the Link Decay per Session



In [6]:
def find_start_end(session):
    notes = annotation_data[(annotation_data['start'] >= session[0])&(annotation_data['end'] < session[1])]
    return notes[['text', 'user.screen_name', 'source', 'start']]

In [7]:
tweets = [find_start_end(session) for session in sessions.to_numpy()]

In [None]:
num_tracks = 4

sess = 0

#0 -> macro, 1 -> meso, 2 -> micro 
time_level = 1 

for t in tweets:
    c = {}

    sess += 1
    sess_graph_version = 0
    
    for tweet in t.to_numpy():
        sess_graph_version += 1
        # build a list
        _text = tweet[0]
        
        # create an intermediate data object
        if tweet[1] in c:
            c[tweet[1]]['diff'] = (tweet[3] - c[tweet[1]]['last'])/10000
            c[tweet[1]]['value'] += 1
            c[tweet[1]]['last'] = tweet[3]
            
        else:
            c[tweet[1]] = {'value': 1, 'last':tweet[3], 'diff':0, 'OK':0, 'okdiff':0, 'oklast':0, 'KO':0,'kodiff':0, 'kolast':0, 
                           'Q':0, 'qdiff':0, 'qlast':0, 'REF':0,'refdiff':0, 'reflast':0 }
        
        if "++" in _text: 
            if int(c[tweet[1]]['oklast']) > 0:
                current = tweet[3] - c[tweet[1]]['oklast']
                if int(c[tweet[1]]['okdiff']) < current:
                    c[tweet[1]]['OK'] -= 1
                else:
                    c[tweet[1]]['OK'] += 1
                c[tweet[1]]['oklast'] = tweet[3]
                c[tweet[1]]['okdiff'] = current
            else:
                c[tweet[1]]['okdiff'] = tweet[3]
                c[tweet[1]]['oklast'] = tweet[3]
                c[tweet[1]]['OK']+=1

        if "??" in _text: 
            if int(c[tweet[1]]['kolast']) > 0:
                current = tweet[3] - c[tweet[1]]['kolast']
                if int(c[tweet[1]]['kodiff']) < current:
                    c[tweet[1]]['KO'] -= 1
                else:
                    c[tweet[1]]['KO'] += 1
                c[tweet[1]]['kolast'] = tweet[3]
                c[tweet[1]]['kodiff'] = current
            else:
                c[tweet[1]]['kodiff'] = tweet[3]
                c[tweet[1]]['kolast'] = tweet[3]
                c[tweet[1]]['KO']+=1

        if "**" in _text: 
            if int(c[tweet[1]]['qlast']) > 0:
                current = tweet[3] - c[tweet[1]]['qlast']
                if int(c[tweet[1]]['qdiff']) < current:
                    c[tweet[1]]['Q'] -= 1
                else:
                    c[tweet[1]]['Q'] += 1
                c[tweet[1]]['qlast'] = tweet[3]
                c[tweet[1]]['qdiff'] = current
            else:
                c[tweet[1]]['qdiff'] = tweet[3]
                c[tweet[1]]['qlast'] = tweet[3]
                c[tweet[1]]['Q'] += 1

        if "==" in _text:
            if int(c[tweet[1]]['reflast']) > 0:
                current = tweet[3] - c[tweet[1]]['reflast']
                if int(c[tweet[1]]['refdiff']) < current:
                    c[tweet[1]]['REF'] -= 1
                else:
                    c[tweet[1]]['REF'] += 1
                c[tweet[1]]['reflast'] = tweet[3]
                c[tweet[1]]['refdiff'] = current
            else:
                c[tweet[1]]['refdiff'] = tweet[3]
                c[tweet[1]]['reflast'] = tweet[3]
                c[tweet[1]]['REF']+=1    

        #lgaMIDI = MIDIFile(num_tracks)  
        #lgaMIDI.addTempo(track, track_time, tempo)            
        t=0
        #create the graph now.
        for user, weight in c.items():
            t  += 1
            #node / nodedge for base edge
            #lgaMIDI.addNote(track, channel, 40 + sess, t, 0.6, volume + c[user]['value'])   
            #lgaMIDI.addNote(track, channel, 42, t + 0.4, 0.6, volume)
            client.send("weight_edge:40:42:{}:{}".format(0.3, volume + c[user]['value']))
            if time_level > 0:
                # Polemic Parts
                #OK
                #if c[user]['OK']  > 0: lgaMIDI.addNote(track, channel, 32, t, 0.3, volume + c[user]['OK'])
                if c[user]['OK']  > 0: client.send("polemic:32:{}:{}".format(volume + c[user]['OK'], 0.3))
                #KO
                #if c[user]['KO']  > 0: lgaMIDI.addNote(track, channel, 27, t, 0.3, volume + c[user]['KO'])
                if c[user]['KO']  > 0: client.send("polemic:27:{}:{}".format(volume + c[user]['KO'], 0.3))
                #Q
                #if c[user]['Q']   > 0: lgaMIDI.addNote(track, channel, 24, t, 0.4, volume + c[user]['Q'])
                if c[user]['Q']  > 0: client.send("polemic:24:{}:{}".format(volume + c[user]['Q'], 0.3))
                #REF
                #if c[user]['REF'] > 0: lgaMIDI.addNote(track, channel, 20, t,0.4, volume + c[user]['REF'])
                if c[user]['REF']  > 0: client.send("polemic:20:{}:{}".format(volume + c[user]['REF'], 0.3))

        time.sleep(2)

## Temporal Decay for the Whole Graph

This is to export the whole graph. 

@todo: fix the times

In [None]:
def find_columns(session):
    return session[['text', 'user.screen_name', 'source', 'start']]

tweets = [find_columne(enmi) for enmi in enmi.to_numpy()]

In [None]:
def update_object(obj, user, tag, last, diff, start, current):
    if int(c[user][last]) > 0:
        current = start - c[user][last]
        if int(c[uuer][diff]) < current:
            c[user][tag] -= 1
        else:
            c[user][tag] += 1
            
        c[user][last] = current
        c[user][diff] = current
    else:
        c[user][diff] = current
        c[user][last] = current
        c[user][tag]+=1


#0 -> macro, 1 -> meso, 2 -> micro 
time_level = 1 

for t in tweets:
    c = {}
    
    for tweet in t.to_numpy():
        # build a list
        _text = tweet[0]
        
        if tweet[1] in c:
            #may be the difference is the beat
            #convert from ms to seconds
            c[tweet[1]]['diff'] = (tweet[3] - c[tweet[1]]['last'])/10000
            c[tweet[1]]['value'] += 1
            c[tweet[1]]['last'] = tweet[3]
            
        else:
            c[tweet[1]] = {'value': 1, 'last':tweet[3], 'diff':0, 'OK':0, 'okdiff':0, 'oklast':0, 'KO':0,'kodiff':0, 'kolast':0, 
                           'Q':0, 'qdiff':0, 'qlast':0, 'REF':0,'refdiff':0, 'reflast':0, 'url':0,'urllast':0, 'urldiff':0, 
                           'hash':0, 'hashlast':0, 'hashdiff':0, 'at':0, 'atlast':0, 'atdiff':0,
                            'tco':0, 'tcolast':0 , 'tcodiff':0 }
        
        if "++" in _text: 
            if int(c[tweet[1]]['oklast']) > 0:
                current = tweet[3] - c[tweet[1]]['oklast']
                if int(c[tweet[1]]['okdiff']) < current:
                    c[tweet[1]]['OK'] -= 1
                else:
                    c[tweet[1]]['OK'] += 1
                c[tweet[1]]['oklast'] = tweet[3]
                c[tweet[1]]['okdiff'] = current
            else:
                c[tweet[1]]['okdiff'] = tweet[3]
                c[tweet[1]]['oklast'] = tweet[3]
                c[tweet[1]]['OK']+=1

        if "??" in _text: 
            if int(c[tweet[1]]['kolast']) > 0:
                current = tweet[3] - c[tweet[1]]['kolast']
                if int(c[tweet[1]]['kodiff']) < current:
                    c[tweet[1]]['KO'] -= 1
                else:
                    c[tweet[1]]['KO'] += 1
                c[tweet[1]]['kolast'] = tweet[3]
                c[tweet[1]]['kodiff'] = current
            else:
                c[tweet[1]]['kodiff'] = tweet[3]
                c[tweet[1]]['kolast'] = tweet[3]
                c[tweet[1]]['KO']+=1

        if "**" in _text: 
            if int(c[tweet[1]]['qlast']) > 0:
                current = tweet[3] - c[tweet[1]]['qlast']
                if int(c[tweet[1]]['qdiff']) < current:
                    c[tweet[1]]['Q'] -= 1
                else:
                    c[tweet[1]]['Q'] += 1
                c[tweet[1]]['qlast'] = tweet[3]
                c[tweet[1]]['qdiff'] = current
            else:
                c[tweet[1]]['qdiff'] = tweet[3]
                c[tweet[1]]['qlast'] = tweet[3]
                c[tweet[1]]['Q'] += 1

        if "==" in _text:
            if int(c[tweet[1]]['reflast']) > 0:
                current = tweet[3] - c[tweet[1]]['reflast']
                if int(c[tweet[1]]['refdiff']) < current:
                    c[tweet[1]]['REF'] -= 1
                else:
                    c[tweet[1]]['REF'] += 1
                c[tweet[1]]['reflast'] = tweet[3]
                c[tweet[1]]['refdiff'] = current
            else:
                c[tweet[1]]['refdiff'] = tweet[3]
                c[tweet[1]]['reflast'] = tweet[3]
                c[tweet[1]]['REF']+=1    

        if time_level == 2:
            #find urls
            match = re.findall(r'(http|ftp|https):\/\/([\w\-_]+(?:(?:\.[\w\-_]+)+))([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?', search)
            if match:
                for m in match:
                    if m[1] == "t.co":
                        if int(c[tweet[1]]['tcolast']) > 0:
                            current = tweet[3] - c[tweet[1]]['tcolast']
                            if int(c[tweet[1]]['tcodiff']) < current:
                                c[tweet[1]]['tco'] -= 1
                            else:
                                c[tweet[1]]['tco'] += 1
                            c[tweet[1]]['tcolast'] = tweet[3]
                            c[tweet[1]]['tcodiff'] = current
                        else:
                            c[tweet[1]]['tcodiff'] = tweet[3]
                            c[tweet[1]]['tcolast'] = tweet[3]
                            c[tweet[1]]['tco']+=1
                    else:
                        if int(c[tweet[1]]['urllast']) > 0:
                            current = tweet[3] - c[tweet[1]]['urllast']
                            if int(c[tweet[1]]['urldiff']) < current:
                                c[tweet[1]]['url'] -= 1
                            else:
                                c[tweet[1]]['url'] += 1
                            c[tweet[1]]['urllast'] = tweet[3]
                            c[tweet[1]]['urldiff'] = current
                        else:
                            c[tweet[1]]['urldiff'] = tweet[3]
                            c[tweet[1]]['urllast'] = tweet[3]
                            c[tweet[1]]['url']+=1

            match = re.findall(r"#(\w+)", search)
            if match:
                for m in match:
                    #let's block enmi based tweets as they the search term
                    if m[4:]!="enmi":
                        if int(c[tweet[1]]['hashlast']) > 0:
                            current = tweet[3] - c[tweet[1]]['hashlast']
                            if int(c[tweet[1]]['hashdiff']) < current:
                                c[tweet[1]]['hash'] -= 1
                            else:
                                c[tweet[1]]['hash'] += 1
                            c[tweet[1]]['hashlast'] = tweet[3]
                            c[tweet[1]]['hashdiff'] = current
                        else:
                            c[tweet[1]]['hashdiff'] = tweet[3]
                            c[tweet[1]]['hashlast'] = tweet[3]
                            c[tweet[1]]['hash']+=1

            match = re.findall(r"@(\w+)", search)
            if match:
                for m in match:
                    if int(c[tweet[1]]['atlast']) > 0:
                        current = tweet[3] - c[tweet[1]]['atlast']
                        if int(c[tweet[1]]['atdiff']) < current:
                            c[tweet[1]]['at'] -= 1
                        else:
                            c[tweet[1]]['at'] += 1
                        c[tweet[1]]['atlast'] = tweet[3]
                        c[tweet[1]]['atdiff'] = current
                    else:
                        c[tweet[1]]['atdiff'] = tweet[3]
                        c[tweet[1]]['atlast'] = tweet[3]
                        c[tweet[1]]['at']+=1
           
        #create the graph now.
        for user, weight in c.items():
            #node / nodedge for base edge
            client.send("weight_edge:40:42:{}:{}".format(0.3, volume + c[user]['value']))
            if time_level > 0:
                # Polemic Parts
                #OK
                if c[user]['OK']  > 0: client.send("polemic:32:{}:{}".format(volume + c[user]['OK'], 0.3))
                #KO
                if c[user]['KO']  > 0: client.send("polemic:27:{}:{}".format(volume + c[user]['KO'], 0.3))
                #Q
                if c[user]['Q']  > 0: client.send("polemic:24:{}:{}".format(volume + c[user]['Q'], 0.3))
                #REF
                if c[user]['REF']  > 0: client.send("polemic:20:{}:{}".format(volume + c[user]['REF'], 0.3))
            
            if time_level == 2:
                if c[user]['url']  > 0: client.send("url:{}:{}:{}".format(10, 0.25, volume + c[user]['url'], 20))
                if c[user]['tco']  > 0: client.send("url:{}:{}:{}".format(10, 0.25, volume + c[user]['tco'], 0))
                if c[user]['at']   > 0: client.send("username:{}:{}:{}".format(40, 0.25, volume + c[user]['at']))
                if c[user]['hash'] > 0: client.send("hashtag:{}:{}:{}".format(30, 0.25, volume + c[user]['hash']))

        time.sleep(2)