In [None]:
# Setup and Imports
import asyncio
import os
from pathlib import Path
from datetime import datetime

# Load environment variables
def load_env():
    env_file = Path(".env")
    if env_file.exists():
        with open(env_file) as f:
            for line in f:
                if line.strip() and not line.startswith('#'):
                    key, _, value = line.partition('=')
                    os.environ[key.strip()] = value.strip()

load_env()

# Target Ethiopian e-commerce channels
target_channels = [
    "@ShegerOnlineStore",
    "@ethio_commerce", 
    "@addis_market",
    "@ethiopia_shopping"
]

print("Multi-channel scraping setup complete")
print(f"Target channels: {target_channels}")


In [None]:
# Test Channel Accessibility
async def test_channel_access():
    """Test which channels are accessible."""
    api_id = os.getenv("TELEGRAM_API_ID")
    api_hash = os.getenv("TELEGRAM_API_HASH")
    phone = os.getenv("TELEGRAM_PHONE_NUMBER")
    
    if not api_id or not api_hash:
        print("Telegram API credentials not found!")
        return []
    
    try:
        from telethon import TelegramClient
        
        print("Connecting to Telegram...")
        client = TelegramClient("channel_test_session", int(api_id), api_hash)
        await client.start(phone=phone)
        print("Connected successfully")
        
        accessible_channels = []
        
        for channel in target_channels:
            print(f"\nTesting channel: {channel}")
            try:
                entity = await client.get_entity(channel)
                accessible_channels.append(channel)
                print(f"  Accessible: {entity.title}")
                print(f"  Subscribers: {getattr(entity, 'participants_count', 'N/A')}")
                
            except Exception as e:
                print(f"  Cannot access: {e}")
        
        await client.disconnect()
        return accessible_channels
        
    except Exception as e:
        print(f"Connection failed: {e}")
        return []

# Run the test
accessible_channels = await test_channel_access()

print(f"\nAccessible channels: {len(accessible_channels)}")
for channel in accessible_channels:
    print(f"  - {channel}")


In [None]:
# Scrape Messages from Multiple Channels
async def scrape_multi_channel_messages(channels, limit_per_channel=5):
    """Scrape messages from multiple channels."""
    api_id = os.getenv("TELEGRAM_API_ID")
    api_hash = os.getenv("TELEGRAM_API_HASH")
    phone = os.getenv("TELEGRAM_PHONE_NUMBER")
    
    if not api_id or not api_hash:
        print("Telegram API credentials not found!")
        return {}
    
    try:
        from telethon import TelegramClient
        
        print("Connecting to Telegram for scraping...")
        client = TelegramClient("scraping_session", int(api_id), api_hash)
        await client.start(phone=phone)
        
        all_messages = {}
        
        for channel in channels:
            print(f"\nScraping from {channel}...")
            channel_messages = []
            
            try:
                entity = await client.get_entity(channel)
                print(f"  Channel: {entity.title}")
                
                count = 0
                async for message in client.iter_messages(entity, limit=20):
                    if message.text and count < limit_per_channel:
                        msg_data = {
                            'id': message.id,
                            'channel': channel,
                            'channel_title': entity.title,
                            'text': message.text,
                            'date': message.date,
                            'views': getattr(message, 'views', None),
                            'has_media': bool(message.media)
                        }
                        channel_messages.append(msg_data)
                        count += 1
                        print(f"    Message {count}: {message.text[:50]}...")
                
                all_messages[channel] = channel_messages
                print(f"  Scraped {len(channel_messages)} messages from {channel}")
                
            except Exception as e:
                print(f"  Failed to scrape {channel}: {e}")
                all_messages[channel] = []
        
        await client.disconnect()
        return all_messages
        
    except Exception as e:
        print(f"Multi-channel scraping failed: {e}")
        return {}

# Run the scraping
if accessible_channels:
    scraped_messages = await scrape_multi_channel_messages(accessible_channels, limit_per_channel=3)
    
    print(f"\nScraping Results:")
    total_messages = 0
    for channel, messages in scraped_messages.items():
        message_count = len(messages)
        total_messages += message_count
        print(f"  {channel}: {message_count} messages")
    
    print(f"\nTotal messages collected: {total_messages}")
else:
    print("No accessible channels found - skipping scraping")
    scraped_messages = {}
