# MergeAnnotations.ipynb

### This notebook merges a list of annotations with the JSON-formatted Tweets.

Author: Erik Puijk <br>
Date  : March 23, 2022

In [16]:
import json
import csv

In [17]:
def read_tweets(path):
    """ Read the Tweets from a given text file and return in JSON-format. """
    
    content = ""
    
    try:
        with open(path, 'r') as f:
            content = json.loads(f.read())
    except IOError:
        print("I/O error")
        
    print("Total Tweets read: %s" % (len(content)))

    return content

In [18]:
def read_annotations(path):
    """ Read the CSV-formatted annotations and store them in a list. """
    
    annotations = []
    
    with open(path, 'r') as f:
        reader = csv.reader(f, delimiter=',')
        for i, row in enumerate(reader):
            if i > 0:
                annotations.append(row)
    
    return annotations

In [19]:
def merge_annotations(tweets, annotations, memo):
    """ Add the annotations to the Tweet if the Tweet ID's match and add a specified memo in the memo field. """
    
    for row in annotations:
        for tweet in tweets:
            
            # Match on Tweet ID
            if row[0] == tweet['id']:
                # Add annotations to corresponding fields
                tweet['cat_con'] = row[1]
                tweet['cat_act'] = row[2]
                tweet['memo'] = memo
    
    return tweets

In [20]:
def write_tweets(tweets_w, path):
    """ Write obtained Tweets to a text file in JSON-format. """
    
    try:
        with open(path, 'w') as f:
            json.dump(tweets_w, f)
    except IOError:
        print("I/O error")

In [21]:
tweets_r = read_tweets('source/tweets_all.txt')
annotations_r = read_annotations('source/gold_standard_annotated.csv')

tweets = merge_annotations(tweets_r, annotations_r, 'gold_standard')

Total Tweets read: 4664


In [22]:
write_tweets(tweets, 'source/tweets_all.txt')