In [1]:
import json
import threading
import time
from collections import defaultdict

class CombinedCache:
    def __init__(self, maxsize=10000, ttl=300):
        self.user_cache = defaultdict(lambda: None)
        self.tweet_cache = defaultdict(lambda: None)
        self.timestamps = defaultdict(lambda: 0)  # Track last access time for TTL
        self.maxsize = maxsize
        self.ttl = ttl

    def get_user(self, user_id):
        return self.user_cache[user_id]

    def get_tweet(self, tweet_id):
        tweet = self.tweet_cache[tweet_id]
        if tweet and (time.time() - self.timestamps[tweet_id] < self.ttl):
            return tweet
        return None

    def put_user(self, user_id, user_data):
        self.user_cache[user_id] = user_data

    def put_tweet(self, tweet_id, tweet_data):
        self.tweet_cache[tweet_id] = tweet_data
        self.timestamps[tweet_id] = time.time()

    def remove_user(self, user_id):
        if user_id in self.user_cache:
            del self.user_cache[user_id]

    def remove_tweet(self, tweet_id):
        if tweet_id in self.tweet_cache:
            del self.tweet_cache[tweet_id]
            del self.timestamps[tweet_id]

    def save_to_disk(self, filename='cache.json'):
        data = {
            'user_cache': dict(self.user_cache),
            'tweet_cache': dict(self.tweet_cache),
            'timestamps': dict(self.timestamps)
        }
        with open(filename, 'w') as f:
            json.dump(data, f)
        print("Cache saved to disk.")

    def load_from_disk(self, filename='cache.json'):
        try:
            with open(filename, 'r') as f:
                data = json.load(f)
                self.user_cache = defaultdict(lambda: None, data['user_cache'])
                self.tweet_cache = defaultdict(lambda: None, data['tweet_cache'])
                self.timestamps = defaultdict(lambda: 0, data['timestamps'])
            print("Cache loaded from disk.")
        except FileNotFoundError:
            print("No cache file found, starting with an empty cache.")

# 创建缓存实例
combined_cache = CombinedCache()

# 设置周期性保存缓存
def periodic_save_cache():
    combined_cache.save_to_disk()
    threading.Timer(3600, periodic_save_cache).start()  # 每小时保存一次

# 在程序启动时加载缓存
combined_cache.load_from_disk()
periodic_save_cache()


Cache loaded from disk.
Cache saved to disk.


In [None]:
#example for datastructrue in firestore
{
  "id_str": "123456",
  "text": "This is a sample tweet",
  "created_at": "2021-01-01T12:00:00Z",
  "user_id": "78910",  // 只存储用户ID
  "retweets": {
    "count": 100,
    "users": [
      "user_id1",
      "user_id2"
    ]
  },
  "hashtags": ["#example", "#sample"]
}

In [2]:
import firebase_admin
from firebase_admin import credentials, firestore
import json
from datetime import datetime
import pytz

# 初始化 Firebase 应用
cred = credentials.Certificate('D:/Download/twitter-a3b9a-firebase-adminsdk-b9pvo-f8f057cf01.json')
firebase_admin.initialize_app(cred)
db = firestore.client()

In [None]:

def parse_twitter_date(datestr):
    return datetime.strptime(datestr, '%a %b %d %H:%M:%S %z %Y').astimezone(pytz.utc)

def main():
    tweets_seen = set()  # 跟踪已处理的推文ID

    with open("corona-out-3", "r") as f1:
        batch = db.batch()  # 创建一个Firestore批处理操作
        count = 0  # 计数批处理中的文档数量

        for line in f1:
            line = line.strip()
            if not line or not line.startswith('{'):
                continue

            try:
                data = json.loads(line)
                tweet_id = data.get('id_str')

                if tweet_id in tweets_seen or tweet_id is None:
                    continue

                tweets_seen.add(tweet_id)
                user_id = data['user'].get('id_str')

                # 准备 Firestore 的推文数据
                tweet_data = {
                    'id_str': tweet_id,
                    'text': data['text'],
                    'created_at': data['created_at'],
                    'user_id': user_id,
                    'like_count': data.get('favorite_count', 0),
                    'retweet_count': data.get('retweet_count', 0),
                    'reply_count': data.get('reply_count', 0),
                    'hashtags': [tag['text'] for tag in data.get('entities', {}).get('hashtags', [])]
                }
                tweet_ref = db.collection('tweets').document(str(tweet_id))
                batch.set(tweet_ref, tweet_data)

                # 检查是否存在转发状态
                if 'retweeted_status' in data:
                    retweeted = data['retweeted_status']
                    retweet_data = {
                        'original_tweet_id': tweet_id,
                        'retweeter_id': retweeted['user']['id_str'],
                        'retweet_time': retweeted['created_at'],
                        'retweet_text': retweeted.get('extended_tweet', {}).get('full_text', retweeted['text']) if 'extended_tweet' in retweeted else retweeted['text']
                    }
                    retweet_ref = db.collection('retweets').document(tweet_id + '_' + retweeted['user']['id_str'])
                    batch.set(retweet_ref, retweet_data)

                count += 2  # 更新两个文档
                if count >= 400:  # Firestore批处理限制在一次操作中最多可以处理500个操作
                    batch.commit()  # 提交批处理
                    batch = db.batch()  # 重置批处理
                    count = 0

            except Exception as e:
                print(f"Error processing line: {e}")
                continue

        if count > 0:
            batch.commit()  # 提交最后一批剩余的数据

if __name__ == "__main__":
    main()


In [None]:
import psycopg2 
#postgre storage
# 数据库连接和其他初始化
host = "localhost"
dbname = "twitter_1"
user = "postgres"
password = "123456"
port = "5432"
conn = psycopg2.connect(host=host, dbname=dbname, user=user, password=password, port=port)
conn.autocommit = True
cursor = conn.cursor()

# 创建用户表
create_table_query = """
CREATE TABLE IF NOT EXISTS user_table (
    id BIGINT PRIMARY KEY,
    id_str VARCHAR(50) NOT NULL,
    name VARCHAR(255),
    screen_name VARCHAR(255),
    location VARCHAR(255),
    url VARCHAR(500),
    description TEXT,
    translator_type VARCHAR(50),
    protected BOOLEAN,
    verified BOOLEAN,
    followers_count INT,
    friends_count INT,
    listed_count INT,
    favourites_count INT,
    statuses_count INT,
    created_at TIMESTAMP WITHOUT TIME ZONE,
    utc_offset INT,
    time_zone VARCHAR(50),
    geo_enabled BOOLEAN,
    lang VARCHAR(50),
    contributors_enabled BOOLEAN,
    is_translator BOOLEAN,
    profile_background_color VARCHAR(7),
    profile_background_image_url VARCHAR(500),
    profile_background_image_url_https VARCHAR(500),
    profile_background_tile BOOLEAN,
    profile_link_color VARCHAR(7),
    profile_sidebar_border_color VARCHAR(7),
    profile_sidebar_fill_color VARCHAR(7),
    profile_text_color VARCHAR(7),
    profile_use_background_image BOOLEAN,
    profile_image_url VARCHAR(500),
    profile_image_url_https VARCHAR(500),
    profile_banner_url VARCHAR(500),
    default_profile BOOLEAN,
    default_profile_image BOOLEAN,
    following BOOLEAN,
    follow_request_sent BOOLEAN,
    notifications BOOLEAN
);
"""

try:
    cursor.execute(create_table_query)
    print("Table created successfully")
except Exception as e:
    print(f"An error occurred: {e}")

# 读取文件并处理每行
file_path = "corona-out-3"  # 确保这是正确的文件路径
with open(file_path, "r") as file:
    for line_number, line in enumerate(file, 1):
        line = line.strip()
        if not line:
            print(f"Skipping empty line: {line_number}")
            continue
        
        try:
            data = json.loads(line)
            user_id_str = data['user']['id_str']
            user_data = data['user']
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON on line {line_number}: {e}")
            continue
        
        # 尝试从缓存获取用户信息
        cached_user = cache.get(user_id_str)
        if cached_user:
            print(f"User {user_id_str} already exists in cache, skipping insert.")
            continue
        
        # 查询数据库以确认用户是否存在
        cursor.execute("SELECT * FROM user_table WHERE id_str = %s;", (user_id_str,))
        result = cursor.fetchone()
        
        if not result:
            try:
                # 将用户数据插入数据库
                cursor.execute("""
                    INSERT INTO user_table (id, id_str, name, screen_name, location, url, description, protected, verified, followers_count, friends_count, listed_count, favourites_count, statuses_count, created_at, profile_background_color, profile_link_color, profile_sidebar_border_color, profile_sidebar_fill_color, profile_text_color, profile_use_background_image, profile_image_url, profile_image_url_https, profile_banner_url, default_profile, default_profile_image, following, follow_request_sent, notifications)
                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
                """, (
                    user_data['id'], user_data['id_str'], user_data['name'], user_data['screen_name'],
                    user_data['location'], user_data.get('url'), user_data['description'],
                    user_data['protected'], user_data['verified'], user_data['followers_count'],
                    user_data['friends_count'], user_data['listed_count'], user_data['favourites_count'],
                    user_data['statuses_count'], user_data['created_at'], user_data.get('profile_background_color'),
                    user_data.get('profile_link_color'), user_data.get('profile_sidebar_border_color'),
                    user_data.get('profile_sidebar_fill_color'), user_data.get('profile_text_color'),
                    user_data.get('profile_use_background_image'), user_data.get('profile_image_url'),
                    user_data.get('profile_image_url_https'), user_data.get('profile_banner_url'),
                    user_data.get('default_profile'), user_data.get('default_profile_image'),
                    user_data.get('following'), user_data.get('follow_request_sent'), user_data.get('notifications')
                ))
                print(f"User {user_id_str} inserted into the database.")
                # 添加用户信息到缓存
                cache.put(user_id_str, user_data)
            except Exception as ex:
                print(f"Error inserting data on line {line_number}: {ex}")
        else:
            print(f"User {user_id_str} already exists in the database, skipping insert.")

# 程序结束前保存缓存状态
cache.checkpoint_cache()
# 关闭游标和连接
cursor.close()
conn.close()


In [3]:
from datetime import datetime, timedelta
import pytz
import tkinter as tk
from tkinter import messagebox, ttk, scrolledtext
import psycopg2 

In [4]:
def calculate_relevance(tweet, query):
    # 基础分数：文本匹配度（简单示例，实际应用中可能需要更复杂的文本分析）
    text_score = 1 if query.lower() in tweet['text'].lower() else 0

    # 互动分数
    interaction_score = (tweet.get('like_count', 0) * 0.1 +
                         tweet.get('retweet_count', 0) * 0.2 +
                         tweet.get('reply_count', 0) * 0.1)

    # 用户影响力分数
    influence_score = tweet.get('user', {}).get('follower_count', 0) * 0.001  # 根据需要调整权重

    # 综合评分
    return text_score * 0.5 + interaction_score * 0.3 + influence_score * 0.2


In [None]:
from datetime import datetime, timedelta
import pytz
import tkinter as tk
from tkinter import messagebox, ttk, scrolledtext
import psycopg2 
def parse_twitter_date(datestr):
    # 示例 datestr: "Sat Apr 25 12:21:41 +0000 2020"
    return datetime.strptime(datestr, '%a %b %d %H:%M:%S %z %Y').astimezone(pytz.utc)

def search_tweets(query, search_type="text", start_date=None, end_date=None, sort_by="created_at", order="DESC"):
    db = firestore.client()
    tweets = db.collection('tweets')

    if search_type == "text":
        tweets = tweets.where('text', '>=', query).where('text', '<=', query + '\uf8ff')
    elif search_type == "hashtag":
        tweets = tweets.where('hashtags', 'array_contains', query)

    if start_date and end_date:
        # 为用户输入的日期添加 UTC 时区信息
        utc_zone = pytz.utc
        start_date = utc_zone.localize(datetime.strptime(start_date, "%Y-%m-%d"))
        end_date = utc_zone.localize(datetime.strptime(end_date, "%Y-%m-%d") + timedelta(days=1))
        
        filtered_results = []
        for doc in tweets.stream():
            tweet = doc.to_dict()
            tweet_created_at = parse_twitter_date(tweet['created_at'])
            if start_date <= tweet_created_at <= end_date:
                filtered_results.append(tweet)
        results = filtered_results
    else:
        results = [doc.to_dict() for doc in tweets.stream()]

    tweets = tweets.order_by(sort_by, direction=firestore.Query.DESCENDING if order == "DESC" else firestore.Query.ASCENDING)
    try:
        results = [doc.to_dict() for doc in tweets.stream()]
    except Exception as e:
        messagebox.showerror("Error", f"Failed to fetch tweets: {e}")
        return []

    user_ids = {result['user_id'] for result in results}
    user_data = get_user_data(list(user_ids)) if user_ids else {}
    
    for result in results:
        result['user'] = user_data.get(result['user_id'], {})

    return results

def get_user_data(user_ids):
    if not user_ids:
        return {}

    try:
        conn = psycopg2.connect(
            host="localhost",
            dbname="twitter_1",
            user="postgres",
            password="123456",
            port="5432"
        )
        cursor = conn.cursor()
        format_strings = ','.join(['%s'] * len(user_ids))
        cursor.execute(f"SELECT id, name, screen_name, location, url FROM user_table WHERE id IN ({format_strings})", tuple(user_ids))
        user_data = {row[0]: {'name': row[1], 'screen_name': row[2], 'location': row[3], 'url': row[4]} for row in cursor.fetchall()}
        cursor.close()
        conn.close()
        return user_data
    except Exception as e:
        messagebox.showerror("Error", f"Failed to fetch user data: {e}")
        return {}

def perform_search():
    search_query = query_entry.get()
    search_type = search_type_combobox.get()
    start_date = start_date_entry.get()
    end_date = end_date_entry.get()
    results = search_tweets(search_query, search_type, start_date, end_date)
    results_display.delete(1.0, tk.END)
    
    for index, result in enumerate(results):
        user_info = result.get('user', {})
        result_text = f"{result['text']} - {user_info.get('name', 'Unknown')} - {result.get('created_at', 'No date')}\n"
        results_display.insert(tk.END, result_text)
        details_button = tk.Button(results_display, text="Details", command=lambda r=result: show_details(r))
        results_display.window_create(tk.END, window=details_button)
        results_display.insert(tk.END, "\n")

def show_details(result):
    user_info = result.get('user', {})
    details = f"Tweet ID: {result.get('id_str', 'N/A')}\nUser: {user_info.get('name', 'Unknown')}\nScreen Name: {user_info.get('screen_name', 'N/A')}\nLocation: {user_info.get('location', 'N/A')}\nText: {result['text']}\nHashtags: {', '.join(result.get('hashtags', []))}"
    messagebox.showinfo("Tweet Details", details)

root = tk.Tk()
root.title("Twitter Search App")

# 创建界面元素
query_label = tk.Label(root, text="Enter search query:")
query_label.pack()
query_entry = tk.Entry(root)
query_entry.pack()

search_type_label = tk.Label(root, text="Search by:")
search_type_label.pack()
search_type_combobox = ttk.Combobox(root, values=("text", "hashtag", "user"))
search_type_combobox.pack()
search_type_combobox.current(0)

start_date_label = tk.Label(root, text="Start Date (YYYY-MM-DD):")
start_date_label.pack()
start_date_entry = tk.Entry(root)
start_date_entry.pack()

end_date_label = tk.Label(root, text="End Date (YYYY-MM-DD):")
end_date_label.pack()
end_date_entry = tk.Entry(root)
end_date_entry.pack()

search_button = tk.Button(root, text="Search", command=perform_search)
search_button.pack()

results_display = scrolledtext.ScrolledText(root, wrap=tk.WORD, height=10, width=50)
results_display.pack(pady=20)

root.mainloop()

In [5]:
import pickle
from collections import OrderedDict

In [6]:
class LRUCache:
    def __init__(self, capacity: int = 100):
        self.cache = OrderedDict()
        self.capacity = capacity

    def get(self, key):
        if key not in self.cache:
            return None
        else:
            self.cache.move_to_end(key)
            return self.cache[key]

    def put(self, key, value):
        if key in self.cache:
            self.cache.move_to_end(key)
        self.cache[key] = value
        if len(self.cache) > self.capacity:
            self.cache.popitem(last=False)

    def save_to_disk(self):
        with open('cache.pkl', 'wb') as f:
            pickle.dump(self.cache, f)

    def load_from_disk(self):
        try:
            with open('cache.pkl', 'rb') as f:
                self.cache = pickle.load(f)
        except FileNotFoundError:
            self.cache = OrderedDict()

In [7]:
cred = credentials.Certificate('D:/Download/twitter-a3b9a-firebase-adminsdk-b9pvo-f8f057cf01.json')
db = firestore.client()

# Global Cache
cache = LRUCache()
cache.load_from_disk()

def parse_twitter_date(datestr):
    timezone = pytz.timezone("UTC")  # Define your timezone
    return datetime.strptime(datestr, '%a %b %d %H:%M:%S %z %Y').astimezone(timezone)

def calculate_relevance(tweet, query):
    text_score = query.lower() in tweet['text'].lower()
    interaction_score = tweet.get('like_count', 0) + tweet.get('retweet_count', 0) + tweet.get('reply_count', 0)
    return text_score * 0.5 + interaction_score * 0.5

def get_user_data(user_ids, use_cache=True):
    results = {}
    missing_ids = []
    for user_id in user_ids:
        if use_cache:
            data = cache.get(user_id)
            if data:
                results[user_id] = data
                continue
        missing_ids.append(user_id)
    
    if missing_ids:
        try:
            conn = psycopg2.connect(host="localhost", dbname="twitter_1", user="postgres", password="123456", port="5432")
            cursor = conn.cursor()
            format_strings = ','.join(['%s'] * len(missing_ids))
            cursor.execute(f"SELECT id, name, screen_name, location, url FROM user_table WHERE id IN ({format_strings})", tuple(missing_ids))
            for row in cursor.fetchall():
                user_data = {'name': row[1], 'screen_name': row[2], 'location': row[3], 'url': row[4]}
                results[row[0]] = user_data
                if use_cache:
                    cache.put(row[0], user_data)
        except Exception as e:
            messagebox.showerror("Error", f"连接数据库失败: {e}")
        finally:
            if conn:
                conn.close()
    
    return results

def get_retweet_details(tweet_id):
    retweets = db.collection('retweets').where('original_tweet_id', '==', tweet_id).stream()
    user_ids = [retweet.to_dict()['retweeter_id'] for retweet in retweets]
    users_info = get_user_data(user_ids)
    retweet_info = []
    for user_id in user_ids:
        if user_id in users_info:
            user_info = users_info[user_id]
            retweet_info.append(f"Retweeted by: {user_info['name']} at {user_info.get('retweet_time', 'Unknown')}")
        else:
            retweet_info.append("Retweet details unavailable")
    return "\n".join(retweet_info)

def search_tweets(query, search_type="text", start_date=None, end_date=None, use_cache=True):
    tweets = db.collection('tweets')
    if search_type == "text":
        tweets = tweets.where('text', '>=', query).where('text', '<=', query + '\uf8ff')
    elif search_type == "hashtag":
        tweets = tweets.where('hashtags', 'array_contains', query)
    results = []
    if start_date and end_date:
        utc_zone = pytz.utc
        start_date = utc_zone.localize(datetime.strptime(start_date, "%Y-%m-%d"))
        end_date = utc_zone.localize(datetime.strptime(end_date, "%Y-%m-%d") + timedelta(days=1))
        for doc in tweets.stream():
            tweet = doc.to_dict()
            tweet_created_at = parse_twitter_date(tweet['created_at'])
            if start_date <= tweet_created_at <= end_date:
                tweet['relevance_score'] = calculate_relevance(tweet, query)
                results.append(tweet)
        results.sort(key=lambda x: x['relevance_score'], reverse=True)
    else:
        results = [doc.to_dict() for doc in tweets.stream()]
    user_ids = {result['user_id'] for result in results}
    users_info = get_user_data(list(user_ids), use_cache=use_cache)  # 使用use_cache参数
    for result in results:
        result['user'] = users_info.get(result['user_id'], {})
        result['retweet_details'] = get_retweet_details(result['id_str'], use_cache=use_cache)  # 确保这里也传递了use_cache参数
    return results


def perform_search():
    search_query = query_entry.get()
    search_type = search_type_combobox.get()
    start_date = start_date_entry.get()
    end_date = end_date_entry.get()
    results = search_tweets(search_query, search_type, start_date, end_date)
    results_display.delete(1.0, tk.END)
    if not results:
        messagebox.showinfo("Search Result", "No tweets found matching your criteria.")
        return
    for index, result in enumerate(results):
        user_info = result.get('user', {})
        result_text = f"{result['text']} - {user_info.get('name', 'Unknown')} - {result.get('created_at', 'No date')} - Retweets: {result.get('retweet_details', 'None')}\n"
        results_display.insert(tk.END, result_text)
        details_button = tk.Button(results_display, text="Details", command=lambda r=result: show_details(r))
        results_display.window_create(tk.END, window=details_button)
        results_display.insert(tk.END, "\n")

def show_details(result):
    user_info = result.get('user', {})
    retweet_details = result.get('retweet_details', 'No retweet details available')
    details = f"Tweet ID: {result.get('id_str', 'N/A')}\nUser: {user_info.get('name', 'Unknown')}\nScreen Name: {user_info.get('screen_name', 'N/A')}\nLocation: {user_info.get('location', 'N/A')}\nText: {result['text']}\nHashtags: {', '.join(result.get('hashtags', []))}\nRetweet Details: {retweet_details}"
    messagebox.showinfo("Tweet Details", details)

In [None]:

# UI initialization and main loop
root = tk.Tk()
root.title("Twitter Search App")
query_label = tk.Label(root, text="Enter search query:")
query_label.pack()
query_entry = tk.Entry(root)
query_entry.pack()
search_type_label = tk.Label(root, text="Search by:")
search_type_label.pack()
search_type_combobox = ttk.Combobox(root, values=("text", "hashtag", "user"))
search_type_combobox.pack()
search_type_combobox.current(0)
start_date_label = tk.Label(root, text="Start Date (YYYY-MM-DD):")
start_date_label.pack()
start_date_entry = tk.Entry(root)
start_date_entry.pack()
end_date_label = tk.Label(root, text="End Date (YYYY-MM-DD):")
end_date_label.pack()
end_date_entry = tk.Entry(root)
end_date_entry.pack()
search_button = tk.Button(root, text="Search", command=perform_search)
search_button.pack()
results_display = scrolledtext.ScrolledText(root, wrap=tk.WORD, height=10, width=50)
results_display.pack(pady=20)

def on_closing():
    cache.save_to_disk()
    root.destroy()

root.protocol("WM_DELETE_WINDOW", on_closing)
root.mainloop()

In [8]:
import time

def run_test_queries(use_cache):
    test_queries = [
        ("767", "user", "2006-07-14", "2006-07-14")
    ]
    times = []
    
    for query, query_type, start_date, end_date in test_queries:
        start_time = time.time()
        results = search_tweets(query, query_type, start_date, end_date, use_cache=use_cache)
        end_time = time.time()
        times.append(end_time - start_time)
        print(f"查询 '{query}' 类型为 '{query_type}' 耗时 {end_time - start_time:.2f} 秒，找到结果数: {len(results)}")
    
    average_time = sum(times) / len(times)
    print(f"平均查询时间（{'启用' if use_cache else '禁用'}缓存）: {average_time:.2f} 秒")

# 首次运行测试，启用缓存
run_test_queries(True)

# 清除缓存，然后再次运行测试，禁用缓存
cache.cache.clear()
run_test_queries(False)


RetryError: Timeout of 300.0s exceeded, last exception: 429 Quota exceeded.

Cache saved to disk.
Cache saved to disk.
Cache saved to disk.
Cache saved to disk.
Cache saved to disk.
Cache saved to disk.
Cache saved to disk.


In [None]:
import firebase_admin
from firebase_admin import credentials, firestore
import json
from datetime import datetime, timedelta
import pytz
import tkinter as tk
from tkinter import messagebox, ttk, scrolledtext
import psycopg2

# Initialization of Firebase application
cred = credentials.Certificate('D:/Download/twitter-a3b9a-firebase-adminsdk-b9pvo-f8f057cf01.json')
db = firestore.client()

def parse_twitter_date(datestr):
    timezone = pytz.timezone("UTC")  # Define your timezone
    return datetime.strptime(datestr, '%a %b %d %H:%M:%S %z %Y').astimezone(timezone)

def calculate_relevance(tweet, query):
    text_score = query.lower() in tweet['text'].lower()
    interaction_score = tweet.get('like_count', 0) + tweet.get('retweet_count', 0) + tweet.get('reply_count', 0)
    return text_score * 0.5 + interaction_score * 0.5

def get_user_data(user_ids):
    if not user_ids:
        return {}
    conn = None
    try:
        conn = psycopg2.connect(host="localhost", dbname="twitter_1", user="postgres", password="123456", port="5432")
        cursor = conn.cursor()
        format_strings = ','.join(['%s'] * len(user_ids))
        cursor.execute(f"SELECT id, name, screen_name, location, url FROM user_table WHERE id IN ({format_strings})", tuple(user_ids))
        user_data = {row[0]: {'name': row[1], 'screen_name': row[2], 'location': row[3], 'url': row[4]} for row in cursor.fetchall()}
        return user_data
    except Exception as e:
        messagebox.showerror("Error", f"Failed to fetch user data: {e}")
        return {}
    finally:
        if conn:
            conn.close()

def get_retweet_details(tweet_id):
    retweets = db.collection('retweets').where('original_tweet_id', '==', tweet_id).stream()
    user_ids = [retweet.to_dict()['retweeter_id'] for retweet in retweets]
    users_info = get_user_data(user_ids)
    retweet_info = []
    for user_id in user_ids:
        if user_id in users_info:
            user_info = users_info[user_id]
            retweet_info.append(f"Retweeted by: {user_info['name']} at {user_info.get('retweet_time', 'Unknown')}")
        else:
            retweet_info.append("Retweet details unavailable")
    return "\n".join(retweet_info)

def search_tweets(query, search_type="text", start_date=None, end_date=None):
    tweets = db.collection('tweets')
    if search_type == "text":
        tweets = tweets.where('text', '>=', query).where('text', '<=', query + '\uf8ff')
    elif search_type == "hashtag":
        tweets = tweets.where('hashtags', 'array_contains', query)
    results = []
    if start_date and end_date:
        utc_zone = pytz.utc
        start_date = utc_zone.localize(datetime.strptime(start_date, "%Y-%m-%d"))
        end_date = utc_zone.localize(datetime.strptime(end_date, "%Y-%m-%d") + timedelta(days=1))
        for doc in tweets.stream():
            tweet = doc.to_dict()
            tweet_created_at = parse_twitter_date(tweet['created_at'])
            if start_date <= tweet_created_at <= end_date:
                tweet['relevance_score'] = calculate_relevance(tweet, query)
                results.append(tweet)
        results.sort(key=lambda x: x['relevance_score'], reverse=True)
    else:
        results = [doc.to_dict() for doc in tweets.stream()]
    user_ids = {result['user_id'] for result in results}
    users_info = get_user_data(list(user_ids))
    for result in results:
        result['user'] = users_info.get(result['user_id'], {})
        result['retweet_details'] = get_retweet_details(result['id_str'])
    return results

def perform_search():
    search_query = query_entry.get()
    search_type = search_type_combobox.get()
    start_date = start_date_entry.get()
    end_date = end_date_entry.get()
    results = search_tweets(search_query, search_type, start_date, end_date)
    results_display.delete(1.0, tk.END)
    if not results:
        messagebox.showinfo("Search Result", "No tweets found matching your criteria.")
        return
    for index, result in enumerate(results):
        user_info = result.get('user', {})
        result_text = f"{result['text']} - {user_info.get('name', 'Unknown')} - {result.get('created_at', 'No date')} - Retweets: {result.get('retweet_details', 'None')}\n"
        results_display.insert(tk.END, result_text)
        details_button = tk.Button(results_display, text="Details", command=lambda r=result: show_details(r))
        results_display.window_create(tk.END, window=details_button)
        results_display.insert(tk.END, "\n")

def show_details(result):
    user_info = result.get('user', {})
    retweet_details = result.get('retweet_details', 'No retweet details available')
    details = f"Tweet ID: {result.get('id_str', 'N/A')}\nUser: {user_info.get('name', 'Unknown')}\nScreen Name: {user_info.get('screen_name', 'N/A')}\nLocation: {user_info.get('location', 'N/A')}\nText: {result['text']}\nHashtags: {', '.join(result.get('hashtags', []))}\nRetweet Details: {retweet_details}"
    messagebox.showinfo("Tweet Details", details)

# UI initialization and main loop
root = tk.Tk()
root.title("Twitter Search App")
query_label = tk.Label(root, text="Enter search query:")
query_label.pack()
query_entry = tk.Entry(root)
query_entry.pack()
search_type_label = tk.Label(root, text="Search by:")
search_type_label.pack()
search_type_combobox = ttk.Combobox(root, values=("text", "hashtag", "user"))
search_type_combobox.pack()
search_type_combobox.current(0)
start_date_label = tk.Label(root, text="Start Date (YYYY-MM-DD):")
start_date_label.pack()
start_date_entry = tk.Entry(root)
start_date_entry.pack()
end_date_label = tk.Label(root, text="End Date (YYYY-MM-DD):")
end_date_label.pack()
end_date_entry = tk.Entry(root)
end_date_entry.pack()
search_button = tk.Button(root, text="Search", command=perform_search)
search_button.pack()
results_display = scrolledtext.ScrolledText(root, wrap=tk.WORD, height=10, width=50)
results_display.pack(pady=20)
root.mainloop()


In [None]:
import firebase_admin
from firebase_admin import credentials, firestore
import json
from datetime import datetime, timedelta
import pytz
import tkinter as tk
from tkinter import messagebox, ttk, scrolledtext
import psycopg2

# Initialization of Firebase application
cred_path = 'D:/Download/twitter-a3b9a-firebase-adminsdk-b9pvo-f8f057cf01.json'
cred = credentials.Certificate(cred_path)
db = firestore.client()

def parse_twitter_date(datestr):
    return datetime.strptime(datestr, '%a %b %d %H:%M:%S %z %Y').astimezone(pytz.UTC)

def calculate_relevance(tweet, query):
    text_score = query.lower() in tweet['text'].lower()
    interaction_score = tweet.get('like_count', 0) + tweet.get('retweet_count', 0) + tweet.get('reply_count', 0)
    return text_score * 0.5 + interaction_score * 0.5

def get_user_data(user_ids):
    if not user_ids:
        return {}
    try:
        conn = psycopg2.connect(host="localhost", dbname="twitter_1", user="postgres", password="123456", port="5432")
        cursor = conn.cursor()
        format_strings = ','.join(['%s'] * len(user_ids))
        query = f"SELECT id, name, screen_name, location, url FROM user_table WHERE id IN ({format_strings})"
        cursor.execute(query, tuple(user_ids))
        user_data = {row[0]: {'name': row[1], 'screen_name': row[2], 'location': row[3], 'url': row[4]} for row in cursor.fetchall()}
    except psycopg2.Error as e:
        messagebox.showerror("Database Error", f"Failed to fetch user data: {e}")
        return {}
    finally:
        conn.close()
    return user_data

def get_retweet_details(tweet_id):
    try:
        retweets = db.collection('retweets').where('original_tweet_id', '==', tweet_id).stream()
        user_ids = [retweet.to_dict()['retweeter_id'] for retweet in retweets]
        users_info = get_user_data(user_ids)
        retweet_info = [f"Retweeted by: {users_info[user_id]['name']} at {users_info[user_id].get('retweet_time', 'Unknown')}"
                        if user_id in users_info else "Retweet details unavailable" for user_id in user_ids]
    except Exception as e:
        messagebox.showerror("Firebase Error", f"Failed to fetch retweet details: {e}")
        return ["Error retrieving retweet details"]
    return "\n".join(retweet_info)

def search_tweets(query, search_type="text", start_date=None, end_date=None):
    try:
        tweets = db.collection('tweets')
        if search_type == "text":
            tweets = tweets.where('text', '>=', query).where('text', '<=', query + '\uf8ff')
        elif search_type == "hashtag":
            tweets = tweets.where('hashtags', 'array_contains', query)
        if start_date and end_date:
            start_date, end_date = [pytz.utc.localize(datetime.strptime(d, "%Y-%m-%d")) for d in [start_date, end_date]]
            end_date += timedelta(days=1)
            tweets = tweets.where('created_at', '>=', start_date).where('created_at', '<=', end_date)
        tweets = tweets.stream()
        results = [{**doc.to_dict(), 'relevance_score': calculate_relevance(doc.to_dict(), query)} for doc in tweets]
    except Exception as e:
        messagebox.showerror("Firebase Error", f"Failed to fetch tweets: {e}")
        return []
    results.sort(key=lambda x: x['relevance_score'], reverse=True)
    return results

# UI initialization and main loop
def setup_ui():
    root = tk.Tk()
    root.title("Twitter Search App")
    elements = {
        "query": tk.Entry(root),
        "search_type": ttk.Combobox(root, values=("text", "hashtag", "user"), state="readonly"),
        "start_date": tk.Entry(root),
        "end_date": tk.Entry(root),
        "results_display": scrolledtext.ScrolledText(root, wrap=tk.WORD, height=10, width=50)
    }
    elements["search_type"].current(0)
    for text, widget in [("Enter search query:", elements["query"]), ("Search by:", elements["search_type"]),
                         ("Start Date (YYYY-MM-DD):", elements["start_date"]), ("End Date (YYYY-MM-DD):", elements["end_date"])]:
        tk.Label(root, text=text).pack()
        widget.pack()
    tk.Button(root, text="Search", command=lambda: perform_search(elements)).pack()
    elements["results_display"].pack(pady=20)
    root.mainloop()

def perform_search(elements):
    query = elements["query"].get()
    search_type = elements["search_type"].get()
    start_date = elements["start_date"].get()
    end_date = elements["end_date"].get()
    results = search_tweets(query, search_type, start_date, end_date)
    results_display = elements["results_display"]
    results_display.delete(1.0, tk.END)
    if not results:
        messagebox.showinfo("Search Result", "No tweets found matching your criteria.")
    for result in results:
        user_info = result.get('user', {})
        result_text = f"{result['text']} - {user_info.get('name', 'Unknown')} - {result.get('created_at', 'No date')} - Retweets: {result.get('retweet_details', 'None')}\n"
        results_display.insert(tk.END, result_text)

setup_ui()
