In [1]:
import csv
from datetime import datetime

def save_tweets_to_csv(tweets, filename):
    fieldnames = [
        'id', 'id_str', 'url', 'date', 'user_id', 'user_id_str',
        'user_url', 'username', 'displayname', 'rawDescription',
        'user_created', 'followersCount', 'friendsCount', 'statusesCount',
        'favouritesCount', 'listedCount', 'mediaCount', 'location',
        'profileImageUrl', 'profileBannerUrl', 'protected', 'verified',
        'blue', 'lang', 'rawContent', 'replyCount', 'retweetCount',
        'likeCount', 'quoteCount', 'bookmarkedCount', 'conversationId',
        'conversationIdStr', 'source', 'sourceUrl', 'sourceLabel'
    ]
    
    with open(filename, mode='w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        
        for tweet in tweets:
            user = tweet.user
            row = {
                'id': tweet.id,
                'id_str': tweet.id_str,
                'url': tweet.url,
                'date': tweet.date.isoformat(),
                'user_id': user.id,
                'user_id_str': user.id_str,
                'user_url': user.url,
                'username': user.username,
                'displayname': user.displayname,
                'rawDescription': user.rawDescription,
                'user_created': user.created.isoformat(),
                'followersCount': user.followersCount,
                'friendsCount': user.friendsCount,
                'statusesCount': user.statusesCount,
                'favouritesCount': user.favouritesCount,
                'listedCount': user.listedCount,
                'mediaCount': user.mediaCount,
                'location': user.location,
                'profileImageUrl': user.profileImageUrl,
                'profileBannerUrl': user.profileBannerUrl,
                'protected': user.protected,
                'verified': user.verified,
                'blue': user.blue,
                'lang': tweet.lang,
                'rawContent': tweet.rawContent,
                'replyCount': tweet.replyCount,
                'retweetCount': tweet.retweetCount,
                'likeCount': tweet.likeCount,
                'quoteCount': tweet.quoteCount,
                'bookmarkedCount': tweet.bookmarkedCount,
                'conversationId': tweet.conversationId,
                'conversationIdStr': tweet.conversationIdStr,
                'source': tweet.source,
                'sourceUrl': tweet.sourceUrl,
                'sourceLabel': tweet.sourceLabel
            }
            writer.writerow(row)
            
import os
import glob

def check_csv_exists(directory, d, m, y):
    pattern = os.path.join(directory, f"*_{d}_{m}_{y}.csv")
    return bool(glob.glob(pattern))


In [3]:
import json
import random
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
from twscrape import API, gather

COOKIES_PATH="cookies.json"
PROXY_PATH = "proxies.txt"
ACC_PATH = "acc.json"


with open(ACC_PATH, "r") as f:
    accs = json.load(f)

with open(PROXY_PATH, "r") as f:
    proxies = f.readlines()

with open(COOKIES_PATH, "r") as f:
    cookies = json.load(f)
    cookies = json.dumps(cookies)


def get_one_month_date_range(year, month, day):
    # Create a datetime object for the start date
    start_date = datetime(year, month, day)
    # Format the start date in ISO 8601 format with UTC time
    start_time = start_date.strftime('%Y-%m-%d')
    
    # Calculate the end date by adding one month
    end_date = start_date + relativedelta(months=1)
    # Subtract one second to include the entire last day
    end_date -= timedelta(seconds=1)
    # Format the end date in ISO 8601 format with UTC time
    end_time = end_date.strftime('%Y-%m-%d')
    
    return start_time, end_time

def getProxy():
    line = random.choice(proxies).strip()
    parts = line.split(":")
    return f"http://{parts[2]}:{parts[3]}@{parts[0]}:{parts[1]}"

api = API()  # or API("path-to.db") - default is `accounts.db`

# ADD ACCOUNTS (for CLI usage see BELOW)
for acc in accs:
    await api.pool.add_account(
        acc["TWITTER_USERNAME"],
        acc["TWITTER_PASSWORD"],
        acc["TWITTER_EMAIL"],
        acc["TWITTER_PASSWORD"],
        cookies=cookies
    )
    
await api.pool.login_all()

data_dir = "data"

for y in range(2007,2025):
    for m in range(1,13):
        query = "ubisoft"
        attributes = "-filter:media"
        api.proxy = getProxy()
        d = 1
        
        if not check_csv_exists(data_dir, d, m, y):
            start_time, end_time = get_one_month_date_range(y, m, d)

            tweets = await gather(api.search(f"{query} {attributes} since:{start_time} until:{end_time} lang:en", limit=1000))  # list[Tweet]
            save_tweets_to_csv(tweets, os.path.join(data_dir, f"{query}_{d}_{m}_{y}.csv"))



[32m2024-10-30 21:01:52.596[0m | [1mINFO    [0m | [36mtwscrape.accounts_pool[0m:[36mget_for_queue_or_wait[0m:[36m301[0m - [1mNo account available for queue "SearchTimeline". Next available at 21:08:40[0m
[32m2024-10-30 21:08:43.686[0m | [1mINFO    [0m | [36mtwscrape.accounts_pool[0m:[36mget_for_queue_or_wait[0m:[36m308[0m - [1mContinuing with account buthuai on queue SearchTimeline[0m
[32m2024-10-30 21:09:24.718[0m | [1mINFO    [0m | [36mtwscrape.accounts_pool[0m:[36mget_for_queue_or_wait[0m:[36m301[0m - [1mNo account available for queue "SearchTimeline". Next available at 21:23:44[0m
[32m2024-10-30 21:23:46.903[0m | [1mINFO    [0m | [36mtwscrape.accounts_pool[0m:[36mget_for_queue_or_wait[0m:[36m308[0m - [1mContinuing with account buthuai on queue SearchTimeline[0m
[32m2024-10-30 21:24:29.214[0m | [1mINFO    [0m | [36mtwscrape.accounts_pool[0m:[36mget_for_queue_or_wait[0m:[36m301[0m - [1mNo account available for queue "SearchTi

In [None]:
save_tweets_to_csv(tweets, f"ubisoft_{day}_{month}_{year}.csv")