<a href="https://colab.research.google.com/github/anas7272/Attention-is-all-you-need/blob/main/web_scraping.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [24]:
!pip install requests beautifulsoup4 pandas lxml --quiet


In [25]:
import requests
from bs4 import BeautifulSoup
import pandas as pd


In [26]:
# Target URL
url = "https://coinmarketcap.com/"

# Send GET request with headers
headers = {"User-Agent": "Mozilla/5.0"}
response = requests.get(url, headers=headers)

if response.status_code == 200:
    print("✅ Successfully fetched the page!")
else:
    raise Exception("❌ Failed to fetch page.")


✅ Successfully fetched the page!


In [27]:
soup = BeautifulSoup(response.text, "lxml")

# Find table rows inside the crypto list table
table = soup.find("table")
rows = table.find("tbody").find_all("tr")

crypto_data = []

for row in rows:
    cols = row.find_all("td")
    if len(cols) > 9:
        try:
            rank = cols[1].text.strip()
            name_parts = cols[2].find_all("p")
            name = name_parts[0].text.strip() if len(name_parts) > 0 else "N/A"
            symbol = name_parts[1].text.strip() if len(name_parts) > 1 else "N/A"
            price = cols[3].text.strip()
            market_cap = cols[7].text.strip()
            volume_24h = cols[8].text.strip()
            supply = cols[9].text.strip()

            crypto_data.append({
                "Rank": rank,
                "Name": name,
                "Symbol": symbol,
                "Price": price,
                "Market Cap": market_cap,
                "24h Volume": volume_24h,
                "Circulating Supply": supply
            })
        except Exception as e:
            print("⚠️ Skipping one row due to parsing issue:", e)
            continue

df = pd.DataFrame(crypto_data)
print(f"✅ Scraped {len(df)} cryptocurrencies!")
df.head(10)


✅ Scraped 10 cryptocurrencies!


Unnamed: 0,Rank,Name,Symbol,Price,Market Cap,24h Volume,Circulating Supply
0,1,Bitcoin,BTC,"$108,335.18","$2.16T$2,156,233,752,082","$103,051,444,738952.89K",19.93M BTC
1,2,Ethereum,ETH,"$3,859.42","$464.29B$464,287,034,033","$51,683,628,98413.43M",120.69M ETH
2,3,Tether,USDT,$1.00,"$182.46B$182,458,516,282","$193,053,305,356193.00B",182.41B USDT
3,4,BNB,BNB,"$1,078.74","$149.97B$149,970,176,020","$4,181,995,7863.88M",139.18M BNB
4,5,XRP,XRP,$2.39,"$143.75B$143,746,154,456","$5,016,157,6452.09B",59.97B XRP
5,6,Solana,SOL,$184.83,"$101.04B$101,044,548,430","$8,311,299,29844.96M",546.66M SOL
6,7,USDC,USDC,$0.9997,"$76.71B$76,706,081,675","$22,280,823,18222.28B",76.72B USDC
7,8,TRON,TRX,$0.3204,"$30.33B$30,334,364,908","$1,046,512,1683.26B",94.66B TRX
8,9,Dogecoin,DOGE,$0.1908,"$28.91B$28,907,540,358","$2,964,416,63915.53B",151.45B DOGE
9,10,Cardano,ADA,$0.6366,"$22.82B$22,823,809,518","$1,200,680,9351.88B",35.85B ADA


In [28]:
# Convert price to numeric for analysis
df["Price_clean"] = (
    df["Price"]
    .str.replace("$", "")
    .str.replace(",", "")
    .astype(float, errors="ignore")
)

# Sort by price
print("\n💰 Top 10 Most Expensive Cryptos:")
display(df.sort_values("Price_clean", ascending=False).head(10)[["Name", "Price"]])



💰 Top 10 Most Expensive Cryptos:


Unnamed: 0,Name,Price
0,Bitcoin,"$108,335.18"
1,Ethereum,"$3,859.42"
3,BNB,"$1,078.74"
5,Solana,$184.83
4,XRP,$2.39
2,Tether,$1.00
6,USDC,$0.9997
9,Cardano,$0.6366
7,TRON,$0.3204
8,Dogecoin,$0.1908
