<a href="https://colab.research.google.com/github/anas7272/Attention-is-all-you-need/blob/main/web_scraping.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
!pip install requests beautifulsoup4 pandas lxml --quiet


In [11]:
import requests
from bs4 import BeautifulSoup
import pandas as pd


In [16]:
# Target URL
url = "https://coinmarketcap.com/"

# Send GET request with headers
headers = {"User-Agent": "Mozilla/5.0"}
response = requests.get(url, headers=headers)

if response.status_code == 200:
    print("‚úÖ Successfully fetched the page!")
else:
    raise Exception("‚ùå Failed to fetch page.")


‚úÖ Successfully fetched the page!


In [18]:
soup = BeautifulSoup(response.text, "lxml")

# Find table rows inside the crypto list table
table = soup.find("table")
rows = table.find("tbody").find_all("tr")

crypto_data = []

for row in rows:
    cols = row.find_all("td")
    if len(cols) > 9:
        try:
            rank = cols[1].text.strip()
            name_parts = cols[2].find_all("p")
            name = name_parts[0].text.strip() if len(name_parts) > 0 else "N/A"
            symbol = name_parts[1].text.strip() if len(name_parts) > 1 else "N/A"
            price = cols[3].text.strip()
            market_cap = cols[7].text.strip()
            volume_24h = cols[8].text.strip()
            supply = cols[9].text.strip()

            crypto_data.append({
                "Rank": rank,
                "Name": name,
                "Symbol": symbol,
                "Price": price,
                "Market Cap": market_cap,
                "24h Volume": volume_24h,
                "Circulating Supply": supply
            })
        except Exception as e:
            print("‚ö†Ô∏è Skipping one row due to parsing issue:", e)
            continue

df = pd.DataFrame(crypto_data)
print(f"‚úÖ Scraped {len(df)} cryptocurrencies!")
df.head(10)


‚úÖ Scraped 10 cryptocurrencies!


Unnamed: 0,Rank,Name,Symbol,Price,Market Cap,24h Volume,Circulating Supply
0,1,Bitcoin,BTC,"$108,186.86","$2.16T$2,157,644,722,134","$103,611,687,164957.44K",19.93M BTC
1,2,Ethereum,ETH,"$3,853.04","$465.34B$465,336,164,739","$51,514,550,75213.36M",120.69M ETH
2,3,Tether,USDT,$1.00,"$182.49B$182,492,679,519","$192,380,087,749192.29B",182.41B USDT
3,4,BNB,BNB,"$1,079.51","$150.23B$150,230,580,702","$4,152,026,5253.84M",139.18M BNB
4,5,XRP,XRP,$2.40,"$144.42B$144,422,079,013","$5,017,989,4162.08B",59.97B XRP
5,6,Solana,SOL,$186.11,"$101.75B$101,745,015,673","$8,298,552,00544.58M",546.66M SOL
6,7,USDC,USDC,$0.9998,"$76.7B$76,695,496,656","$22,300,616,58522.30B",76.7B USDC
7,8,TRON,TRX,$0.3209,"$30.38B$30,378,863,560","$1,051,018,1423.27B",94.66B TRX
8,9,Dogecoin,DOGE,$0.1916,"$29.03B$29,033,524,792","$2,940,894,95215.34B",151.45B DOGE
9,10,Cardano,ADA,$0.6395,"$22.93B$22,929,351,411","$1,191,656,7181.86B",35.85B ADA


In [19]:
# Convert price to numeric for analysis
df["Price_clean"] = (
    df["Price"]
    .str.replace("$", "")
    .str.replace(",", "")
    .astype(float, errors="ignore")
)

# Sort by price
print("\nüí∞ Top 10 Most Expensive Cryptos:")
display(df.sort_values("Price_clean", ascending=False).head(10)[["Name", "Price"]])



üí∞ Top 10 Most Expensive Cryptos:


Unnamed: 0,Name,Price
0,Bitcoin,"$108,186.86"
1,Ethereum,"$3,853.04"
3,BNB,"$1,079.51"
5,Solana,$186.11
4,XRP,$2.40
2,Tether,$1.00
6,USDC,$0.9998
9,Cardano,$0.6395
7,TRON,$0.3209
8,Dogecoin,$0.1916
