In [60]:
!pip install atproto
!pip install pandas
!pip install aiohttp



In [61]:
import aiohttp
import pandas as pd
import os
import sys

In [62]:
# Initialisiere die Liste für die gesammelten Daten
all_feeds = []
MAX_QUERIES = 300  # Definiere eine maximale Anzahl von Abfragen (eine Abfrage beinhaltet 100 API aufrufe z.B. 300 = ~30000)

In [63]:
def map_post(post):
    try:
        return [{
            'author': post.get('author').get('displayName') if post.get('author').get('displayName') else post.get('author').get('handle'),
            'text': post.get('record').get('text'),
            'created_at': post.get('record').get('createdAt'),
        }]
    except KeyError as e:
        print(f"Schlüssel {e} nicht gefunden im Post")
        return []
    
def save_to_csv():
    if all_feeds:
        df = pd.DataFrame(all_feeds)
        file_name = 'filtered_feeds.csv'
        if os.path.exists(file_name):
            df.to_csv(file_name, mode='a', header=False, index=False)
        else:
            df.to_csv(file_name, index=False)
    else:
        print("\nKeine Daten zum Speichern vorhanden.")

In [64]:
async def fetch_post(session, query, cursor=None):
    q = {'q': query, 'limit': 100}
    if cursor:
        q['cursor'] = cursor
    try:
        async with session.get('https://public.api.bsky.app/xrpc/app.bsky.feed.searchPosts', params=q) as response:
            # Überprüfe den Content-Type
            content_type = response.headers.get('Content-Type')
            if 'application/json' in content_type:
                return await response.json()
            else:
                print(f"Unerwarteter Content-Type: {content_type}")
                # response_text = await response.text()
                # print(f"Antwortinhalt: {response_text}")
                return None
    except Exception as e:
        print(f"Fehler beim Abrufen von Posts: {e}")
        return None

async def fetch_replies(session, uri):
    try:
        async with session.get(f'https://public.api.bsky.app/xrpc/app.bsky.feed.getPostThread', params={'uri': uri, 'depth': 1, 'parentHeight': 0}) as response:
            content_type = response.headers.get('Content-Type')
            if 'application/json' in content_type:
                return await response.json()
            else:
                print(f"Unerwarteter Content-Type: {content_type}")
                # response_text = await response.text()
                # print(f"Antwortinhalt: {response_text}")
                return None
    except Exception as e:
        print(f"Fehler beim Abrufen von Antworten: {e}")
        return None


async def process_posts(session, query):
    cursor = None
    side = 0
    replies_counter = 0
    saved_feed_count = 0

    print(f"Werte Suchbegriff {query} aus...")

    while side < MAX_QUERIES:
        rawFeeds = await fetch_post(session, query, cursor)

        if rawFeeds is None:
            continue

        for post in rawFeeds.get('posts', []):
            all_feeds.extend(map_post(post))

            if post.get('replyCount') > 0 and post.get('uri', None) is not None:
                replies_counter += post.get('replyCount')
                rawThread = await fetch_replies(session, post.get('uri'))

                if rawThread is not None:
                    for reply in rawThread.get('thread').get('replies', []):
                        all_feeds.extend(map_post(reply.get('post')))

        cursor = rawFeeds.get('cursor')
        side += 1

        save_to_csv()
        saved_feed_count += len(all_feeds)
        all_feeds.clear()

        percent_complete = (side / MAX_QUERIES) * 100
        sys.stdout.write(f'\rFortschritt: {percent_complete}% - {saved_feed_count} posts geschrieben, davon {replies_counter} replies')
        sys.stdout.flush()

async def start():
    async with aiohttp.ClientSession() as session:
        await process_posts(session, 'europawahl')
        # await process_posts(session, 'euwahl')

In [65]:
# Ausführung des asynchronen Codes
await start()  # Direktes Ausführen der async-Funktion

Werte Suchbegriff europawahl aus...
Fortschritt: 100.0% - 55959 posts geschrieben, davon 26973 replies6859 repliess