In [1]:
import os
import pandas as pd
from datetime import datetime
from dotenv import load_dotenv
from telethon import TelegramClient, errors

# --- Load credentials ---
load_dotenv()
API_ID = int(os.getenv("TELEGRAM_API_ID"))
API_HASH = os.getenv("TELEGRAM_API_HASH")

# Local session file (so you don’t need to login each time)
SESSION_NAME = "tg_session"

# Channels or groups you want to scrape
TARGETS = [
    "Whale_Alert",
    "binance_announcements",
    "coingecko",
    "cointelegraph"
]

LIMIT_PER_CHANNEL = 300   # messages per channel

# Initialize Telegram client
client = TelegramClient(SESSION_NAME, API_ID, API_HASH)

async def scrape_channel(target, limit):
    """Scrape a single channel/group."""
    try:
        entity = await client.get_entity(target)
    except ValueError:
        print(f"⚠️ Could not resolve {target}. Skipping.")
        return []

    rows = []
    async for msg in client.iter_messages(entity, limit=limit):
        if not msg.message and not msg.media:
            continue

        rows.append({
            "channel": getattr(entity, "username", None) or getattr(entity, "title", str(target)),
            "message_id": msg.id,
            "date": msg.date,
            "text": msg.message or "",
            "views": getattr(msg, "views", None),
            "forwards": getattr(msg, "forwards", None),
            "replies": (msg.replies.replies if msg.replies else None),
            "url": f"https://t.me/{getattr(entity,'username','')}/{msg.id}" if getattr(entity,'username',None) else None,
            "has_media": msg.media is not None,
        })
    return rows

async def scrape_all():
    """Scrape all target channels and save to CSV/Parquet."""
    await client.start()  # will prompt for phone & login code first time
    all_rows = []

    for t in TARGETS:
        try:
            print(f"Scraping: {t}")
            rows = await scrape_channel(t, LIMIT_PER_CHANNEL)
            print(f"  → {len(rows)} messages")
            all_rows.extend(rows)
        except errors.FloodWaitError as e:
            print(f"⏳ Rate-limited for {e.seconds}s on {t}. Try later.")
        except errors.ChatAdminRequiredError:
            print(f"🚫 No permission to read {t}. (Private/restricted)")
        except Exception as e:
            print(f"❌ {t}: {e}")

    if not all_rows:
        print("No data collected.")
        return

    df = pd.DataFrame(all_rows).sort_values(["channel", "date"]).reset_index(drop=True)

    ts = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
    csv_path = f"telegram_messages_{ts}.csv"
    parquet_path = f"telegram_messages_{ts}.parquet"

    df.to_csv(csv_path, index=False)
    try:
        df.to_parquet(parquet_path, index=False)
    except Exception:
        parquet_path = None

    print(f"\n✅ Saved {len(df)} messages")
    print(f"CSV: {csv_path}")
    if parquet_path:
        print(f"Parquet: {parquet_path}")

    return df

In [3]:
import nest_asyncio
nest_asyncio.apply()

import asyncio
df = asyncio.get_event_loop().run_until_complete(scrape_all())
df.head()

Signed in successfully as D K; remember to not break the ToS or you will risk an account ban!
Scraping: Whale_Alert
  → 300 messages
Scraping: binance_announcements
  → 300 messages
Scraping: coingecko
  → 300 messages
Scraping: cointelegraph
  → 300 messages

✅ Saved 1200 messages
CSV: telegram_messages_20250818_134711.csv
Parquet: telegram_messages_20250818_134711.parquet


Unnamed: 0,channel,message_id,date,text,views,forwards,replies,url,has_media
0,binance_announcements,7258,2025-05-14 09:01:03+00:00,Binance Will Support the EOS (EOS) Token Swap ...,183726.0,164.0,56.0,https://t.me/binance_announcements/7258,True
1,binance_announcements,7259,2025-05-15 05:16:23+00:00,Introducing Nexpace (NXPC) on Binance HODLer A...,202729.0,166.0,44.0,https://t.me/binance_announcements/7259,True
2,binance_announcements,7260,2025-05-15 07:00:54+00:00,"Binance Will Add Nexpace (NXPC) on Earn, Buy C...",174306.0,77.0,80.0,https://t.me/binance_announcements/7260,True
3,binance_announcements,7261,2025-05-15 10:01:43+00:00,Introducing Solayer (LAYER) on BNSOL Super Sta...,173067.0,60.0,9.0,https://t.me/binance_announcements/7261,True
4,binance_announcements,7262,2025-05-15 12:02:47+00:00,Nothing’s Topping This: Refer Friends & Share ...,195326.0,54.0,46.0,https://t.me/binance_announcements/7262,True
