In [41]:
from json import loads
from datetime import datetime
import re

In [42]:
def top_10_users(data_path):

    users = {}

    with open(data_path, "r", encoding="utf-8") as tf:
        
        current_line = tf.readline()

        while current_line:

            line_obj = loads(current_line.strip())

            tweet_user = line_obj["user"]

            try:
                users[tweet_user["username"]]["tweet_count"] += 1
            except KeyError:
                users[tweet_user["username"]] = {"user": tweet_user, "tweet_count": 1}

            current_line = tf.readline()

    top_users = []
    min_tweets_in_list = float("inf")
    min_position = -1

    for user in users.values():

        if len(top_users) < 10:
            top_users.append(user)
            
            if user["tweet_count"] < min_tweets_in_list:
                min_position = len(top_users) - 1
                min_tweets_in_list = user["tweet_count"]
        
        elif user["tweet_count"] > min_tweets_in_list:
            top_users[min_position] = user

            new_min_user = min(top_users, key=lambda x: x["tweet_count"])
            min_tweets_in_list = new_min_user["tweet_count"]
            min_position = top_users.index(new_min_user)
    
    top_users.sort(key=lambda x: x["tweet_count"], reverse=True)

    return [user["user"] for user in top_users]

In [43]:
def top_10_retweeted(data_path):

    top_10_tweets = []

    with open(data_path, "r", encoding="utf-8") as tf:

        min_retweet_in_list = float("inf")
        min_position = -1

        current_line = tf.readline()
        i = 1
        while current_line:

            line_obj = loads(current_line.strip())

            if len(top_10_tweets) < 10:
                top_10_tweets.append(line_obj)

                if line_obj["retweetCount"] < min_retweet_in_list:
                    min_position = len(top_10_tweets) - 1
                    min_retweet_in_list = line_obj["retweetCount"]
            
            elif line_obj["retweetCount"] > min_retweet_in_list:
                top_10_tweets[min_position] = line_obj

                new_min_obj = min(top_10_tweets, key=lambda x: x["retweetCount"])
                min_retweet_in_list = new_min_obj["retweetCount"]
                min_position = top_10_tweets.index(new_min_obj)
            
            current_line = tf.readline()
            i += 1
    
    top_10_tweets.sort(key=lambda x: x["retweetCount"], reverse=True)

    return top_10_tweets
                

In [44]:
def top_10_days(data_path):

    dates = {}

    with open(data_path, "r", encoding="utf-8") as tf:

        current_line = tf.readline()

        while current_line:

            line_obj = loads(current_line.strip())

            tweet_timestamp = line_obj["date"]
            date = datetime.fromisoformat(tweet_timestamp)

            try:
                dates[f"{date.year}-{date.month}-{date.day}"] += 1
            except KeyError:
                dates[f"{date.year}-{date.month}-{date.day}"] = 1

            current_line = tf.readline()
    
    top_10_dates = []
    min_tweets_in_list = float("inf")
    min_position = -1

    for date, tweet_count in dates.items():

        if len(top_10_dates) < 10:
            top_10_dates.append({"date": date, "tweet_count": tweet_count})

            if tweet_count < min_tweets_in_list:
                min_position = len(top_10_dates) - 1
                min_tweets_in_list = tweet_count
        
        elif tweet_count > min_tweets_in_list:
            top_10_dates[min_position] = {"date": date, "tweet_count": tweet_count}

            new_min_tweet_date = min(top_10_dates, key=lambda x: x["tweet_count"])
            min_tweets_in_list = new_min_tweet_date["tweet_count"]
            min_position = top_10_dates.index(new_min_tweet_date)
    
    top_10_dates.sort(key=lambda x: x["tweet_count"], reverse=True)

    return [date["date"] for date in top_10_dates]

In [45]:
def top_10_hashtags(data_path):
    
    hashtags = {}

    with open(data_path, "r", encoding="utf-8") as tf:

        current_line = tf.readline()

        while current_line:

            line_obj = loads(current_line.strip())
            tweet_content = line_obj["content"]

            ## Based on https://stackoverflow.com/questions/2527892/parsing-a-tweet-to-extract-hashtags-into-an-array
            tweet_hashtags = re.findall(r'\B#\w*[a-zA-Z]+\w*', tweet_content)

            for hashtag in tweet_hashtags:
                try:
                    hashtags[hashtag] += 1
                except KeyError:
                    hashtags[hashtag] = 1
            
            current_line = tf.readline()

    top_10_tags = []
    min_count_in_list = float("inf")
    min_position = -1

    for tag, count in hashtags.items():

        if len(top_10_tags) < 10:
            top_10_tags.append({"hashtag": tag, "count": count})

            if count < min_count_in_list:
                min_position = len(top_10_tags) - 1
                min_count_in_list = count
        
        elif count > min_count_in_list:
            top_10_tags[min_position] = {"hashtag": tag, "count": count}

            new_min_hashtag = min(top_10_tags, key=lambda x: x["count"])
            min_count_in_list = new_min_hashtag["count"]
            min_position = top_10_tags.index(new_min_hashtag)
    
    top_10_tags.sort(key=lambda x: x["count"], reverse=True)

    return [hashtag["hashtag"] for hashtag in top_10_tags]

In [50]:
def main():

    data_path = "./farmers-protest-tweets-2021-03-5.json"

    top_users = top_10_users(data_path)

    print([user["username"] for user in top_users])

In [51]:
main()

['harjot_tweeting', 'tasveersandhu', 'shells_n_petals', 'jot__b', 'rebelpacifist', 'rumsomal', 'Iamjazzie96', 'Jass_k_G', 'DigitalKisanBot', 'z_khalique007']
