En este archivo puedes escribir lo que estimes conveniente. Te recomendamos detallar tu solución y todas las suposiciones que estás considerando. Aquí puedes ejecutar las funciones que definiste en los otros archivos de la carpeta src, medir el tiempo, memoria, etc.

In [1]:
from typing import List, Tuple
from datetime import datetime
import json
import time
from collections import Counter

file_path = "farmers-protest-tweets-2021-2-4.json"

In [2]:
'''
Las top 10 fechas donde hay más tweets. Mencionar el usuario (username) que más publicaciones tiene por cada uno de esos días
{
"date": "2021-02-04",
    "user"={
"id": 123456,
"username": "user1",
}
}

'''
# Radix sort para ordenar los tweets por fecha 
def radix_sort_tweets(tweets):
    max_val = max(tweet['date_ordinal'] for tweet in tweets)
    exp = 1
    while max_val // exp > 0:
        counting_sort_tweets(tweets, exp)
        exp *= 10
# Counting sort para 
def counting_sort_tweets(tweets, exp):
    n = len(tweets)
    output = [None] * n
    count = [0] * 10

    for i in range(n):
        index = (tweets[i]['date_ordinal'] // exp) % 10
        count[index] += 1

    for i in range(1, 10):
        count[i] += count[i - 1]

    i = n - 1
    while i >= 0:
        index = (tweets[i]['date_ordinal'] // exp) % 10
        output[count[index] - 1] = tweets[i]
        count[index] -= 1
        i -= 1
    for i in range(n):
        tweets[i] = output[i]

def q1_time(file_path: str) -> List[Tuple[datetime.date, str]]:
    tweets = []
    with open(file_path, 'r') as file:
        for line in file:
            tweet = json.loads(line)
            date = datetime.fromisoformat(tweet['date']).date()
            user = tweet['user']['username']
            tweets.append({'date': date, 'date_ordinal': date.toordinal(), 'user': user})

    radix_sort_tweets(tweets)

    tweets_by_date_user = Counter()
    for tweet in tweets:
        tweets_by_date_user[(tweet['date'], tweet['user'])] += 1

    tweets_by_date = Counter()
    for tweet in tweets:
        tweets_by_date[tweet['date']] += 1

    top_dates = tweets_by_date.most_common(10)

    top_users_per_date = []
    for date, _ in top_dates:
        top_user = ''
        max_tweets = 0
        for (tweet_date, user), count in tweets_by_date_user.items():
            if tweet_date == date and count > max_tweets:
                max_tweets = count
                top_user = user
        top_users_per_date.append((date, top_user))

    return top_users_per_date



In [3]:
# BST para almacenar los tweets por fecha y usuario y luego obtener los top 10 usuarios, esto esta pensando para que las proximas consultas sean mas rapidas
def q1_memory(file_path: str) -> List[Tuple[datetime.date, str]]:
    class BSTNode:
        def __init__(self, date, user):
            self.date = date
            self.users = {user: 1}
            self.left = None
            self.right = None

        def insert(self, date, user):
            if date < self.date:
                if self.left is None:
                    self.left = BSTNode(date, user)
                else:
                    self.left.insert(date, user)
            elif date > self.date:
                if self.right is None:
                    self.right = BSTNode(date, user)
                else:
                    self.right.insert(date, user)
            else:
                if user in self.users:
                    self.users[user] += 1
                else:
                    self.users[user] = 1

        def get_top_users(self, top_n):
            data = []
            if self.left:
                data += self.left.get_top_users(top_n)
            data.append((self.date, max(self.users, key=self.users.get), sum(self.users.values())))
            if self.right:
                data += self.right.get_top_users(top_n)
            return sorted(data, key=lambda x: x[2], reverse=True)[:top_n]
    root = None
    with open(file_path, 'r') as file:
        for line in file:
            tweet = json.loads(line)
            date = datetime.fromisoformat(tweet['date']).date()
            user = tweet['user']['username']
            if root is None:
                root = BSTNode(date, user)
            else:
                root.insert(date, user)
    return root.get_top_users(10) if root else []

In [4]:
%reload_ext memory_profiler
%memit top_dates = q1_memory(file_path)


peak memory: 81.16 MiB, increment: 6.26 MiB


In [5]:
%reload_ext memory_profiler
%memit top_dates = q1_time(file_path)

peak memory: 124.77 MiB, increment: 45.21 MiB


In [6]:
start_time = time.time()
top_dates = q1_time(file_path)
end_time = time.time()
print("q1_time:", end_time - start_time, "s")

q1_time: 3.1946589946746826 s


In [7]:
start_time = time.time()
top_dates = q1_memory(file_path)
end_time = time.time()
print("q1_memory:", end_time - start_time, "seconds")

q1_memory: 2.971240997314453 seconds
