## Importing Required Libraries

In [1]:
#import statements
from datetime import datetime, timedelta
from flask import Flask, render_template, request, url_for, redirect,flash
import pymongo
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
from markupsafe import escape
from bson.objectid import ObjectId
import time
import pandas as pd
import os
import mysql.connector
import re
import math
import json
import sys
import time
from bson.json_util import dumps
from bson import json_util
import pickle
from collections import defaultdict

## Connecting to MySQL

In [2]:
#connecting to mysql
# Establish a connection to MySQL
cnx = mysql.connector.connect(user='root', password='password', host='localhost')

# Create a cursor object
cursor = cnx.cursor()

# Create a new database
cursor.execute("CREATE DATABASE IF NOT EXISTS twitter_data_users_2")

# Use the new database
cursor.execute("USE twitter_data_users_2")
cursor.execute("SHOW Tables Like 'users';")
result = cursor.fetchone()

# Create a new table
create_table = """
CREATE TABLE IF NOT EXISTS users (
    id BIGINT PRIMARY KEY,
    name VARCHAR(100),
    screen_name VARCHAR(100),
    location VARCHAR(1000),
    followers_count INT,
    friends_count INT,
    statuses_count INT,
    verified BOOLEAN,  
    protected BOOLEAN,  
    listed_count INT,  
    created_at DATETIME
)
"""
if result:
    cursor.execute("DROP Table users;")
    print("The Table Users already exists dropping the table........\n")
cursor.execute(create_table)
print("The Table User has been created")
# Close the cursor and connection
#cursor.close()
#cnx.close()

The Table Users already exists dropping the table........

The Table User has been created


## Connecting to MongoDB

In [3]:
client = pymongo.MongoClient("mongodb://localhost:27017/")
dbnames = client.list_database_names()
if "TweetsDB2" in dbnames:
    print("The database exists. It will be deleted.")
    client.drop_database("TweetsDB2")
db = client["TweetsDB2"]
col_names = db.list_collection_names()
if "Tweets_data_2" in col_names:
    print("Tweet Collection exists. It will be deleted.")
    db.Tweets_data.drop()
tweets_coll = db["Tweets_data_2"]
print("Database connection established")

The database exists. It will be deleted.
Database connection established


## Initializing App

In [4]:
#initializing app
app = Flask(__name__)

app.config['SECRET_KEY'] = os.urandom(24)

In [5]:
class JSONEncoder(json.JSONEncoder):
    def default(self, o):
        if isinstance(o, ObjectId):
            return str(o)
        if isinstance(o, datetime):
            return o.isoformat()
        return json.JSONEncoder.default(self, o)

## Creating Cache and Creating Databases

At the moment, this cache only includes storage for MongoDB -- the tweets database. All queries pertain to tweets

In [6]:
class Cache():
    def __init__(self):
        # Initialize the cache and the write counter
        self.cache = {}
        self.write_counter = 0

        # Connect to the MongoDB server
        self.client = MongoClient("mongodb://localhost:27017/")
        self.db = self.client["TweetsDB2"]
        self.tweets_coll = self.db["tweets_coll"]
        
        ## Connect to the MySQL server  (ADDED)
        self.conn = mysql.connector.connect(
            host='localhost',
            user='root',
            password='password',
            database='twitter_data_users_2')
        self.cursor = self.conn.cursor()

    def data_modifier(self, data):
        # Convert the data to JSON format
        res = {'res': data}
        json_res = json.dumps(res, default=str, ensure_ascii=False).encode('utf8')
        return json_res

    def clear_20_percent(self):
        # Clear the oldest 20% of keys in the cache
        keys = sorted(self.cache.keys(), key=lambda x: self.cache[x][1])
        target = math.ceil(len(keys)*0.2)

        for i in range(target):
            del self.cache[keys[i]]

    def push_to_cache(self, key, data):
        # Push data to the cache with a TTL of 3 days
        data = self.data_modifier(data)
        if sys.getsizeof(self.cache) < 10 * 1024 * 1024:  # 10 MB limit
            self.cache[key] = (data, time.time())  # Set TTL to 3 days
        else:
            self.clear_20_percent()
            self.cache[key] = (data, time.time())  # Set TTL to 3 days

    def get_from_cache(self, key):
        # Get data from the cache
        try:
            data, timestamp = self.cache[key]
            ttl = 3 * 24 * 60 * 60
            if time.time() - timestamp > ttl:
                del self.cache[key]
                return -1
            return json.loads(data.decode())['res']
        except KeyError:
            return -1

    def most_common_hashtags(self):
        # Get the most recent date in the database
        most_recent_doc = self.tweets_coll.find_one(sort=[("created_at", -1)])
        if most_recent_doc is None:
            print("No documents found in the collection.")
            return []
        if "created_at" not in most_recent_doc:
            print("The 'created_at' field does not exist in the document.")
            return []
        most_recent_date = most_recent_doc["created_at"]
        # Calculate the date 3 days before the most recent date
        three_days_ago = most_recent_date - timedelta(days=3)
        # Find the top 10 hashtags in the past 3 days
        pipeline = [
            {"$match": {"created_at": {"$gte": three_days_ago}}},
            {"$unwind": "$Hashtag"},
            {"$group": {"_id": "$Hashtag", "count": {"$sum": 1}}},
            {"$sort": {"count": -1}},
            {"$limit": 10}
        ]
        result = self.tweets_coll.aggregate(pipeline)
        most_common = list(result)
        return most_common

    def top_retweets(self):
        # Get the most recent date in the database
        most_recent_date = self.tweets_coll.find_one(sort=[("created_at", -1)])["created_at"]
        # Calculate the date 3 days before the most recent date
        three_days_ago = most_recent_date - timedelta(days=3)
        # Find the top 10 tweets with the most retweets in the past 3 days
        result = self.tweets_coll.find({"created_at": {"$gte": three_days_ago}}).sort("Retweet_Count", -1).limit(10)
        top_retweets = list(result)
        return top_retweets
    
    def top10_tweets(self):
        # Get the most recent date in the database
        most_recent_date = self.tweets_coll.find_one(sort=[("created_at", -1)])["created_at"]
        # Calculate the date 3 days before the most recent date
        three_days_ago = most_recent_date - timedelta(days=3)
        # Find the top 10 tweets with the most retweets in the past 3 days
        # Find the top 10 original tweets (Retweet_ID = 0) by favorite count
        top_original_tweets = self.tweets_coll.find({'Retweet_ID': 0}).sort('Favorite_Count', -1).limit(10)
        return list(top_original_tweets)
    

    def most_active_users(self):
        query = "SELECT id, name, screen_name, statuses_count/(DATEDIFF(NOW(), created_at)+1) as tweet_frequency FROM users ORDER BY tweet_frequency DESC LIMIT 10"
        self.cursor.execute(query)
        rows = self.cursor.fetchall()
        result = []
        for row in rows:
            user_dict = {
                "id": row[0],
                "name": row[1],
                "screen_name": row[2],
                "tweet_frequency": row[3]
            }
            result.append(user_dict)
        return result



    def update_cache_tweets(self):
        # Update the cache with the top 10 hashtags and top 10 tweets with the most retweets
        hashtags = self.most_common_hashtags()
        self.push_to_cache("top_hashtags", hashtags)
        retweets = self.top_retweets()
        self.push_to_cache("top_retweets", retweets)
        top10_tweets = self.top10_tweets()
        self.push_to_cache("top10_tweets", top10_tweets)
        # Save all tweets within 3 days that used the most common hashtags
        most_recent_date = self.tweets_coll.find_one(sort=[("created_at", -1)])["created_at"]
        three_days_ago = most_recent_date - timedelta(days=3)
        for hashtag in hashtags:
            hashtag_tweets = self.tweets_coll.find({"created_at": {"$gte": three_days_ago}, "Hashtag": hashtag["_id"]})
            self.push_to_cache(f"tweets_with_{hashtag['_id']}", list(hashtag_tweets))
        self.push_to_cache("most_recent_date", most_recent_date)
        self.push_to_cache("three_days_ago", three_days_ago)
        # Print the cache
        print("Cache updated:")
        for key, value in self.cache.items():
            print(f"{key}",f"{value}")

        # Save the cache to a file using pickle
        with open('cache.pkl', 'wb') as f:
            pickle.dump(self.cache, f)

    def update_cache_users(self):
        # Update the cache with the results of the user queries
        most_active_users = self.most_active_users()
        self.push_to_cache("most_active_users", most_active_users)
        

        # Print the cache
        print("Cache updated:")
        for key, value in self.cache.items():
            print(f"{key}",f"{value}")

        # Save the cache to a file using pickle
        with open('cache.pkl', 'wb') as f:
            pickle.dump(self.cache, f)

    def delete_tweets_cache(self):
        keys_to_delete = ["top_hashtags", "top_retweets"]
        keys_to_delete.extend(key for key in self.cache.keys() if key.startswith("tweets_with_"))
        for key in keys_to_delete:
            if key in self.cache:
                del self.cache[key]

    def delete_users_cache(self):
        keys_to_delete = ["top_10_followers", "most_active_users"]
        for key in keys_to_delete:
            if key in self.cache:
                del self.cache[key]

    def insert_into_mongo(self, data):
        # Insert data into the MongoDB collection
        self.tweets_coll.insert_one(data)
        
        # Update the write counter
        self.write_counter += sys.getsizeof(data)
        # If more than 100 MB of data has been written, update the cache and reset the counter
        if self.write_counter >= 100 * 1024 * 1024:
            self.delete_tweets_cache()
            self.update_cache_tweets()
            self.write_counter = 0
            
    def insert_into_mysql(self, query, data):
        self.cursor.execute(query, data)
        self.conn.commit()
        self.write_counter += sys.getsizeof(data)
        if self.write_counter >= 100 * 1024 * 1024:
            self.delete_users_cache()
            self.update_cache_users()
            self.write_counter = 0

# # Create a Cache object
cache = Cache()

In [7]:
import json
from datetime import datetime

def find_in_collection(tid):
    record = None
    record = cache.tweets_coll.find({'TweetID': tid})
    if len(list(record)) > 0:
        return True
    return False

def increment_retweet_count(tid):
    cache.tweets_coll.update_one({'TweetID': tid}, {'$inc': {'Retweet_Count': 1}})

def process_line(line):
    line = line.strip()  # remove leading/trailing white spaces
    if not line:  # checks if line is empty
        return

    try:
        json_object = json.loads(line)
    except json.JSONDecodeError as e:
        print(f"Error occurred: {e}")
        return

    try:
        if 'id' in json_object and find_in_collection(json_object['id']):
            return

        if json_object['text'].startswith('RT') and 'retweeted_status' in json_object:
            RetweetID = json_object['retweeted_status']['id']
            if find_in_collection(RetweetID):
                increment_retweet_count(RetweetID)
            else:
                retweet_created_at = datetime.strptime(json_object['retweeted_status']['created_at'], '%a %b %d %H:%M:%S +0000 %Y')
                retweeted_status = json_object['retweeted_status']
                full_text = retweeted_status.get('extended_tweet', {}).get('full_text', retweeted_status['text'])
                dict = {
                    'created_at': retweet_created_at,
                    'TweetID': retweeted_status['id'],
                    'Id_str': retweeted_status['id_str'],
                    'Text': retweeted_status['text'],
                    'Full_Text': full_text,
                    'Hashtag': list(map(lambda x: x["text"], retweeted_status['entities']['hashtags'])),
                    'UserID': retweeted_status['user']['id'],
                    'Retweet_Count': 1,
                    'Retweet_ID': 0,
                    'Favorite_Count': retweeted_status['favorite_count']
                }
                cache.insert_into_mongo(dict)
        else:
            RetweetID = 0

        tweet_created_at = datetime.strptime(json_object['created_at'], '%a %b %d %H:%M:%S +0000 %Y')
        dict = {
            'created_at': tweet_created_at,
            'TweetID': json_object['id'],
            'Id_str': json_object['id_str'],
            'Text': json_object['text'],
            'Hashtag': list(map(lambda x: x["text"], json_object['entities']['hashtags'])),
            'UserID': json_object['user']['id'],
            'Retweet_Count': 0,
            'Retweet_ID': RetweetID,
            'Favorite_Count': json_object['favorite_count']
        }

        cache.insert_into_mongo(dict)
    except KeyError as e:
        print(f"KeyError: {e}")

with open('corona-out-2', 'r') as f:
    for line in f:
        process_line(line)

with open('corona-out-3', 'r') as f:
    for line in f:
        process_line(line)

In [8]:
with open("corona-out-2", 'r') as f:
    # Process each line in the file
    for line in f:
        try:
            # Parse the line as JSON
            tweet = json.loads(line)
        except json.JSONDecodeError:
            # If a line cannot be parsed as JSON, skip it
            continue

        # Extract the user information from the tweet
        user_info = tweet.get('user', {})

        # Clean the name by removing all non-alphabet characters
        cleaned_name = re.sub(r'[^a-zA-Z]', ' ', user_info.get('name'))

        # Parse the Twitter datetime string to a datetime object
        dt = datetime.strptime(user_info.get('created_at'), '%a %b %d %H:%M:%S +0000 %Y')
        
        data =  (user_info.get('id'), cleaned_name, user_info.get('screen_name'), 
                user_info.get('location'), user_info.get('followers_count'), user_info.get('friends_count'), 
                user_info.get('statuses_count'), user_info.get('verified'), user_info.get('protected'), 
                user_info.get('listed_count'), dt)

        # Prepare the SQL query
        query = """
                INSERT IGNORE INTO users (id, name, screen_name, location, followers_count, friends_count, 
                statuses_count, verified, protected,listed_count, created_at)
                VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                """
        cache.insert_into_mysql(query, data)
        
with open("corona-out-3", 'r') as f:
    
    # Process each line in the file
    for line in f:
        try:
            # Parse the line as JSON
            tweet = json.loads(line)
        except json.JSONDecodeError:
            # If a line cannot be parsed as JSON, skip it
            continue

        # Extract the user information from the tweet
        user_info = tweet.get('user', {})

        # Clean the name by removing all non-alphabet characters
        cleaned_name = re.sub(r'[^a-zA-Z]', ' ', user_info.get('name'))

        # Parse the Twitter datetime string to a datetime object
        dt = datetime.strptime(user_info.get('created_at'), '%a %b %d %H:%M:%S +0000 %Y')
        
        data =  (user_info.get('id'), cleaned_name, user_info.get('screen_name'), 
                user_info.get('location'), user_info.get('followers_count'), user_info.get('friends_count'), 
                user_info.get('statuses_count'), user_info.get('verified'), user_info.get('protected'), 
                user_info.get('listed_count'), dt)

        # Prepare the SQL query
        query = """
                INSERT IGNORE INTO users (id, name, screen_name, location, followers_count, friends_count, 
                statuses_count, verified, protected,listed_count, created_at)
                VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                """
        cache.insert_into_mysql(query, data)

## Tweet Queries

### Defining Query Functions

In [9]:

def get_hashtags_in_tweets_with_keyword(keyword, sorting_var="Retweet_Count"):
    regex = f".*{keyword}.*"  # Create a regex pattern to match the keyword in the hashtag
    query = {"Hashtag": {"$regex": regex, "$options": "i"}}  # Case-insensitive regex match
    tweets = db.tweets_coll.find(query, {"Text": 1, "Retweet_Count": 1, "UserID": 1, "_id": 1, "created_at": 1}).sort(sorting_var, -1)
    result = []
    for tweet in tweets:
        tweet['_id'] = str(tweet['_id'])  # Convert ObjectId to string
        result.append(tweet)
    return result

def search_tweets_by_keyword(keyword, sort_by='Retweet_Count'):
    regex = f".*{keyword}.*"  # Create a regex pattern to match the keyword in the tweet text
    query = {"Text": {"$regex": regex, "$options": "i"}}  # Case-insensitive regex match
    tweets = db.tweets_coll.find(query)
    sorted_tweets = sorted(tweets, key=lambda x: x.get(sort_by, 0), reverse=True)
    results = []
    for idx, tweet in enumerate(sorted_tweets, start=1):
        result = {
            'Text': tweet['Text'],
            'Retweet_Count': tweet.get('Retweet_Count', 0),
            'UserID': tweet['UserID'],
            '_id': tweet['_id'],
            'created_at': tweet['created_at']
        }
        results.append(result)
    return results

def tweets_10_fav():
    agg_result = db.tweets_coll.aggregate([
        {"$sort": {"Favorite_Count":-1}},
        {"$limit": 10}
    ])
    output = list(agg_result)
    return output

def tweets_10_retweet():
    agg_result = db.tweets_coll.aggregate([
        {"$sort": {"Retweet_Count":-1}},
        {"$limit": 10}
    ])
    output = list(agg_result)
    return output

def hashtag_10():
    agg_result = db.tweets_coll.aggregate([
        {"$unwind": "$Hashtag"},
        {"$group": {"_id": "$Hashtag", "count": {"$sum": 1}}},
        {"$sort": {"count": -1}},
        {"$limit": 10}
    ])
    output = list(agg_result)
    return output

## User Queries

### Defining Query Functions

In [10]:
def find_users_with_screen_name(word):
    # This function finds users with screen names containing a specific word
    connection = cnx
    cursor = cnx.cursor()
    query = "SELECT * FROM users WHERE screen_name LIKE %s"
    cursor.execute(query, ('%' + word + '%',))
    result = cursor.fetchall()
    
    # Create a dictionary to store the result
    users_dict = {}
    for row in result:
        user_id = row[0]  # Assuming the first column is the user ID
        user_details = {
            'name': row[1],
            'screen_name': row[2],
            'followers_count': row[4],
        }
        users_dict[user_id] = user_details

    return users_dict


def find_user_by_id(user_id):
    # This function finds users with a specific ID
    connection = cnx
    cursor = cnx.cursor()
    query = "SELECT * FROM users WHERE id = %s"
    cursor.execute(query, (user_id,))
    columns = [col[0] for col in cursor.description]
    results = []
    for row in cursor.fetchall():
        results.append(dict(zip(columns, row)))
    return results

def users_10():
    connection = cnx
    cursor = cnx.cursor()
    cursor.execute("SELECT * FROM users ORDER BY statuses_count/(DATEDIFF(NOW(), created_at)+1) DESC LIMIT 10;")
    rows = cursor.fetchall()
    result = []
    for row in rows:
        user_dict = {
            "id": row[0],
            "name": row[1],
            "screen_name": row[2],
            "statuses_count": row[3],
            "created_at": row[4]
        }
        result.append(user_dict)
    return result


## Creating Home Page for App

In [11]:
def con_date(date):
    if isinstance(date, datetime):
        return date
    else:
        try:
            return datetime.strptime(date, "%Y-%m-%d %H:%M:%S")
        except ValueError:
            try:
                return datetime.strptime(date, "%Y-%m-%d")
            except ValueError:
                return date

In [12]:
#Home Page
@app.route('/', methods=['GET', 'POST'])

def index():
    if request.method=='POST':
        search_term = request.form['search']
        search_by = request.form['search_by']
        start_date = "2000-01-01"
        end_date = "2030-01-01"
        if request.form['date_from']:
            start_date = request.form['date_from']
            if request.form['date_to']:
                end_date = request.form['date_to']
            return redirect(url_for('results',search_by = search_by, search_term = search_term, start_date = start_date, end_date = end_date))
    return render_template('index.html')

In [13]:
@app.route('/results/<search_by>/<search_term>/<start_date>/<end_date>', methods=['GET','POST'])
def results(search_by, search_term, start_date="2000-01-01", end_date="2030-01-01"):
    start = time.time()
    end = None 
    
    start_datetime = datetime.strptime(start_date, "%Y-%m-%d")
    end_datetime = datetime.strptime(end_date, "%Y-%m-%d")
    
    search_key = "search " + search_term + " category " + search_by
    if search_by == 'hashtag':
        search_key = 'tweets_with_' + search_term
        # Retrieve cache_results
        cache_results = cache.get_from_cache(search_key)
        if cache_results != -1:
            most_recent_date = cache.get_from_cache("most_recent_date")
            three_days_ago = cache.get_from_cache("three_days_ago")
            hashtag_tweets = cache_results  # Assuming cache_results contains hashtag-related tweets
            if con_date(most_recent_date) >= con_date(start_datetime) and con_date(three_days_ago) <= con_date(end_datetime):
                search_results = cache_results
            else:
                search_key = 'search' + search_term + 'category' + search_by
        else:
            search_key = 'search' + search_term + 'category' + search_by
    else:
        search_key = 'search' + search_term + 'category' + search_by
    cache_results = cache.get_from_cache(search_key)
    if cache_results != -1:
        search_results = cache_results
    else:
        if search_by == 'text':
            search_results = search_tweets_by_keyword(search_term)
            for doc in search_results:
                user_info = find_user_by_id(doc['UserID'])
                del doc['_id']
                try:
                    doc['name'] = user_info[0][2]
                    doc['screen_name'] = user_info[0][3]
                except:
                    continue
        elif search_by == "author":
            user_info = find_users_with_screen_name(search_term)
            if user_info:
                search_results = []
                for i in user_info:
                    search_result = find_user_by_id(i)
                    for doc in search_result:
                        doc['name'] = user_info[i]['name']
                        doc['screen_name'] = user_info[i]['screen_name']
                        del doc['id']
                        search_results.append(doc)
            else:
                search_results = []
        elif search_by == "hashtag":
            search_results = get_hashtags_in_tweets_with_keyword(search_term)
            for doc in search_results:
                user_info = find_user_by_id(doc['UserID'])
                del doc['_id']
                try:
                    doc['name'] = user_info[0][2]
                    doc['screen_name'] = user_info[0][3]
                except:
                    continue
        cache.push_to_cache(search_key, search_results)
    
    # Move the filtering logic outside of the else block
    for doc in search_results[:]:
        #doc['created_at'] = datetime(doc['created_at'])
        if not (con_date(start_datetime) <= con_date(doc['created_at']) <= con_date(end_datetime)):
            search_results.remove(doc)
    
    if request.method =='POST':
        sort_by = request.form['sort_by']
        if sort_by == "relevance":
            if search_by == "author":
                search_results.sort(key=lambda x: x['followers_count'], reverse=True)
            else:
                search_results.sort(key=lambda x: x['Retweet_Count'], reverse=True)
        elif sort_by == "recent":
            search_results.sort(key=lambda x: x['created_at'], reverse=True)
        elif sort_by == "old":
            search_results.sort(key=lambda x: x['created_at'])
        end = time.time()  # Update end time here
        return render_template('results.html', results=search_results, search_time=end-start)

    end = time.time()  # Update end time here as well
    if len(search_results) == 0:
        flash('NO RESULTS FOUND')
        return redirect(url_for('index'))
    else:
        return render_template('results.html', results=search_results, search_time=end-start)


In [14]:
#Page for User Drill-down, accepts a user_id
@app.route('/user/<user_id>')
def user(user_id):
    user_info = search_by_user_id(user_id)
    if not user_info:
        flash('USER NOT FOUND')
        return redirect(url_for('index'))
    search_results = author_search(user_info[0][0], sorting_var = "created_at")
    return render_template('users.html', user_info = user_info, results = search_results)

#Page for top 10 users by follower count
@app.route('/top10users')
def top_user():
    start = time.time()
    cache_results = cache.most_active_users()
    if cache_results == -1:
        top_users_ = users_10()
        top_users=[]
        for doc in top_users_:
            doc = list(doc)
            doc[9] = doc[9].strftime("%Y-%m-%d")
            top_users.append(doc)
        cache.push_to_top10_cache('top10users',top_users)
    else:
        top_users = cache_results
    end = time.time()
    return render_template('top10users.html', top_users = top_users, search_time = end-start)

#Page for top 10 tweets by retweet count
@app.route('/top10tweets_favorite')
def top_tweets_fav():
    start = time.time()
    cache_results = cache.top10_tweets()
    if cache_results == -1:
        top_tweets = tweets_10()
        for doc in top_tweets:
            del doc['_id']
            user_info = find_by_user_id(doc['UserID'])
            try:
                doc['name'] = user_info[0][2]
                doc['screen_name'] = user_info[0][3]
            except:
                return
            cache.push_to_cache('top10_tweets', top_tweets)
    else:
        top_tweets = cache_results
        end = time.time()
        print(top_tweets)
        return render_template('top10tweets_favorite.html', top_tweets = top_tweets, search_time = end-start)
    
@app.route('/top10tweets_retweets')
def top_tweets_retweet():
    start = time.time()
    cache_results = cache.top_retweets()
    if cache_results == -1:
        top_tweets = tweets_10()
        for doc in top_tweets:
            del doc['_id']
            user_info = find_by_user_id(doc['UserID'])
            try:
                doc['name'] = user_info[0][2]
                doc['screen_name'] = user_info[0][3]
            except:
                return
            cache.push_to_cache('top_retweets', top_tweets)
    else:
        top_tweets = cache_results
        end = time.time()
        return render_template('top10tweets_retweets.html', top_tweets = top_tweets, search_time = end-start)
    
@app.route('/top10hashtags')
def top_hashtag():
    start = time.time()
    cache_results = cache.most_common_hashtags()
    if cache_results == -1:
        top_hashtags = hashtag_10()
        for doc in top_hashtags:
            cache.push_to_cache('top_hashtag', top_hashtags)
    else:
        top_hashtags = cache_results
        end = time.time()
        return render_template('top10hashtags.html', top_hashtags = top_hashtags, search_time = end-start)
    

In [None]:
app.run(debug=True, port=5002, use_reloader=False)

 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5002
Press CTRL+C to quit
127.0.0.1 - - [26/Apr/2024 10:27:43] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [26/Apr/2024 10:28:09] "POST / HTTP/1.1" 302 -
127.0.0.1 - - [26/Apr/2024 10:28:19] "GET /results/text/corona/2020-04-01/2020-04-07 HTTP/1.1" 200 -
127.0.0.1 - - [26/Apr/2024 10:28:27] "POST /results/text/corona/2020-04-01/2020-04-07 HTTP/1.1" 200 -
127.0.0.1 - - [26/Apr/2024 10:36:42] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [26/Apr/2024 10:36:47] "GET /top10users HTTP/1.1" 200 -
127.0.0.1 - - [26/Apr/2024 10:36:49] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [26/Apr/2024 10:36:50] "GET /top10tweets_favorite HTTP/1.1" 200 -


[{'_id': ObjectId('662b9c7022717e940d9f2322'), 'created_at': datetime.datetime(2020, 3, 4, 17, 31, 21), 'TweetID': 1235256530728972290, 'Id_str': '1235256530728972290', 'Text': 'ALERT‼️‼️‼️\nThe corona virus can be spread through money. If you have any money at home, put on some gloves, put al… https://t.co/juJjDpFN3I', 'Full_Text': "ALERT‼️‼️‼️\nThe corona virus can be spread through money. If you have any money at home, put on some gloves, put all the money in to a plastic bag and put it outside the front door tonight. I'm collecting all the plastic bags tonight for safety. Think of your health.", 'Hashtag': [], 'UserID': 2863558530, 'Retweet_Count': 1, 'Retweet_ID': 0, 'Favorite_Count': 1128502}, {'_id': ObjectId('662b9f4222717e940d9fdf0a'), 'created_at': datetime.datetime(2020, 3, 13, 0, 43, 40), 'TweetID': 1238264431320215553, 'Id_str': '1238264431320215553', 'Text': '*corona virus enters my body*\n\nThe 4 Flintstone gummies I ate in 2005: https://t.co/3STfdIQtaT', 'Full_Text': '*

127.0.0.1 - - [26/Apr/2024 10:36:53] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [26/Apr/2024 10:36:54] "GET /top10tweets_retweets HTTP/1.1" 200 -
127.0.0.1 - - [26/Apr/2024 10:36:56] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [26/Apr/2024 10:36:58] "GET /top10hashtags HTTP/1.1" 200 -
127.0.0.1 - - [26/Apr/2024 10:36:59] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [26/Apr/2024 10:37:20] "POST / HTTP/1.1" 302 -
127.0.0.1 - - [26/Apr/2024 10:37:21] "GET /results/text/corona/2020-04-01/2020-04-05 HTTP/1.1" 200 -
127.0.0.1 - - [26/Apr/2024 10:37:26] "POST /results/text/corona/2020-04-01/2020-04-05 HTTP/1.1" 200 -
127.0.0.1 - - [26/Apr/2024 10:37:31] "POST /results/text/corona/2020-04-01/2020-04-05 HTTP/1.1" 200 -
127.0.0.1 - - [26/Apr/2024 10:37:36] "POST /results/text/corona/2020-04-01/2020-04-05 HTTP/1.1" 200 -
127.0.0.1 - - [26/Apr/2024 10:37:40] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [26/Apr/2024 10:37:59] "POST / HTTP/1.1" 302 -
127.0.0.1 - - [26/Apr/2024 10:37:59] "GET /results/author/sam/2010-01-26/2024-

[{'_id': ObjectId('662b9c7022717e940d9f2322'), 'created_at': datetime.datetime(2020, 3, 4, 17, 31, 21), 'TweetID': 1235256530728972290, 'Id_str': '1235256530728972290', 'Text': 'ALERT‼️‼️‼️\nThe corona virus can be spread through money. If you have any money at home, put on some gloves, put al… https://t.co/juJjDpFN3I', 'Full_Text': "ALERT‼️‼️‼️\nThe corona virus can be spread through money. If you have any money at home, put on some gloves, put all the money in to a plastic bag and put it outside the front door tonight. I'm collecting all the plastic bags tonight for safety. Think of your health.", 'Hashtag': [], 'UserID': 2863558530, 'Retweet_Count': 1, 'Retweet_ID': 0, 'Favorite_Count': 1128502}, {'_id': ObjectId('662b9f4222717e940d9fdf0a'), 'created_at': datetime.datetime(2020, 3, 13, 0, 43, 40), 'TweetID': 1238264431320215553, 'Id_str': '1238264431320215553', 'Text': '*corona virus enters my body*\n\nThe 4 Flintstone gummies I ate in 2005: https://t.co/3STfdIQtaT', 'Full_Text': '*

127.0.0.1 - - [26/Apr/2024 10:47:51] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [26/Apr/2024 10:47:52] "GET /top10tweets_retweets HTTP/1.1" 200 -
127.0.0.1 - - [26/Apr/2024 10:48:01] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [26/Apr/2024 10:48:02] "GET /top10hashtags HTTP/1.1" 200 -
127.0.0.1 - - [26/Apr/2024 10:48:12] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [26/Apr/2024 10:49:06] "POST / HTTP/1.1" 302 -
127.0.0.1 - - [26/Apr/2024 10:49:06] "GET /results/hashtag/corona/2020-04-04/2020-04-08 HTTP/1.1" 200 -
127.0.0.1 - - [26/Apr/2024 10:49:24] "POST /results/hashtag/corona/2020-04-04/2020-04-08 HTTP/1.1" 200 -
127.0.0.1 - - [26/Apr/2024 10:49:40] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [26/Apr/2024 10:50:07] "POST / HTTP/1.1" 302 -
127.0.0.1 - - [26/Apr/2024 10:50:07] "GET /results/author/sam/2010-01-26/2024-04-26 HTTP/1.1" 200 -
127.0.0.1 - - [26/Apr/2024 10:50:07] "GET /static/dummy_sam.jpeg HTTP/1.1" 304 -
127.0.0.1 - - [26/Apr/2024 10:50:54] "POST /results/author/sam/2010-01-26/2024-04-26 HTTP/1.1" 