# Analysis.ipynb

### This notebook analyses the data for the Results section of the thesis.

Author: Erik Puijk <br>
Date  : May 9, 2022

In [215]:
import json
import pandas as pd
import csv
import matplotlib.pyplot as plt
plt.close("all")

In [216]:
def read_tweets(path):
    """ Read the Tweets from a given text file and return in JSON-format. """
    
    content = ""
    
    try:
        with open(path, 'r') as f:
            content = json.loads(f.read())
    except IOError:
        print("I/O error")
        
    print("Total Tweets read: %s\n" % (len(content)))

    return content

In [217]:
def write_csv(headers, rows, path):
    """ Write results from the analysis to a csv-file in a specified path. """
    
    with open(path, 'w') as f:
        w = csv.writer(f)
        
        w.writerow(headers)
        w.writerows(rows)
    
    print("Total rows written: %s\n" % (len(rows) + 1))

In [218]:
def average_engagement(tweets):
    """ Count the occurrences and average engagement of each category. """
    
    cat_con = [['cat_con', x, sum(tweets.cat_con == x), 
                "%.1f%%" % round(sum(tweets.cat_con == x) / len(tweets) * 100, 1),
                "%.2f" % round(tweets.loc[tweets['cat_con'] == x, 'total_engagement'].mean() * 1000, 2)]
               for x in sorted(tweets.cat_con.unique())]
    cat_act = [['cat_act', x, sum(tweets.cat_act == x), 
                "%.1f%%" % round(sum(tweets.cat_act == x) / len(tweets) * 100, 1),
                "%.2f" % round(tweets.loc[tweets['cat_act'] == x, 'total_engagement'].mean() * 1000, 2)]
               for x in sorted(tweets.cat_act.unique())]
    media_type = [['media_type', x, sum(tweets.media_type == x), 
                   "%.1f%%" % round(sum(tweets.media_type == x) / len(tweets) * 100, 1), 
                   "%.2f" % round(tweets.loc[tweets['media_type'] == x, 'total_engagement'].mean() * 1000, 2)]
               for x in sorted(tweets.media_type.unique())]
    
    # Write results to a csv file
    write_csv(['Type', 'Category', 'N', 'Percentage', 'Average engagement (x1000)'],
             [*cat_con, *cat_act, *media_type],
             'results/average_engagement.csv')

In [355]:
def engagement_per_day(tweets):
    
    df = tweets[['created_at', 'cat_con', 'cat_act', 'media_type', 'total_engagement']]
    df.insert(0, 'date', pd.to_datetime(df.created_at.str[:10], format='%Y-%m-%d'))
    del df['created_at']
    
    print(df.head())

In [356]:
tweets = read_tweets('source/tweets_all_labeled.txt')
tweets_pd = pd.DataFrame(tweets)

# Show the number of Tweets and average engagement per messaging strategy
average_engagement(tweets_pd)

engagement_per_day(tweets_pd)

#tweets_pd

Total Tweets read: 4664

Total rows written: 12

        date cat_con cat_act media_type  total_engagement
0 2021-03-17     POL     CON      video          0.000481
1 2021-02-22     SOC     FOL      photo          0.001084
2 2021-03-10     CAM     FOL      photo          0.021416
3 2021-03-10     CAM     FOL       none          0.004270
4 2021-02-09     POL     FOL       none          0.002403
