In [1]:
pip install requests beautifulsoup4

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import requests
from bs4 import BeautifulSoup
import csv

# Tải trang web
url = 'https://pokemondb.net/pokedex/all'
response = requests.get(url)
html_content = response.content

# Phân tích HTML
soup = BeautifulSoup(html_content, 'html.parser')

# Tìm tất cả các dòng dữ liệu Pokemon
pokemon_rows = soup.select('tr')[1:]  # Bỏ qua dòng đầu tiên (tiêu đề)

# Mở file CSV để ghi dữ liệu
with open('pokemon_data_22.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)

    # Viết tiêu đề cho các cột
    writer.writerow(['#', 'Name', 'Type', 'Total', 'HP', 'Attack', 'Defense', 'Sp.Atk', 'Sp.Def', 'Speed'])

    # Lặp qua từng dòng dữ liệu Pokemon
    for row in pokemon_rows:
        cells = row.select('td')

        # Trích xuất thông tin cần thiết
        pokedex_number = cells[0].select_one('span.infocard-cell-data').text
        name = cells[1].select_one('a.ent-name').text.strip()
        types = [type.text for type in cells[2].select('a.type-icon')]
        stats = [stat.text for stat in cells[3:]]

        # Gộp Type1 và Type2
        type_combined = ' '.join(types)

        # Ghi dữ liệu vào file CSV
        writer.writerow([
            pokedex_number,
            name,
            type_combined,  # Gộp Type1 và Type2
            stats[0],  # Total
            stats[1],  # HP
            stats[2],  # Attack
            stats[3],  # Defense
            stats[4],  # Sp.Atk
            stats[5],  # Sp.Def
            stats[6]   # Speed
        ])

## Chuyển thành JSON


In [3]:
import csv
import json

def csv_to_json(csv_file_path, json_file_path):
    data = []
    
    # Đọc file CSV
    with open(csv_file_path, mode='r') as csv_file:
        csv_reader = csv.DictReader(csv_file)
        
        # Chuyển từng dòng của file CSV thành dictionary và thêm vào list
        for row in csv_reader:
            data.append(row)
    
    # Ghi dữ liệu sang file JSON
    with open(json_file_path, mode='w') as json_file:
        json.dump(data, json_file, indent=4)
    
    print(f"Data from {csv_file_path} has been successfully converted to {json_file_path}")

# Sử dụng hàm
csv_to_json('pokemon_data_22.csv', 'pokemon_data.json')


Data from pokemon_data_22.csv has been successfully converted to pokemon_data.json


#### Thêm ảnh


In [5]:
import csv
import json

def csv_to_json_with_images(csv_file_path, json_file_path):
    data = []
    
    # Đọc file CSV
    with open(csv_file_path, mode='r') as csv_file:
        csv_reader = csv.DictReader(csv_file)
        
        for row in csv_reader:
            # Thêm trường image_url với link hình ảnh dựa trên cột ID (#)
            row['image_url'] = f"https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/{int(row['#'])}.png"
            data.append(row)
    
    # Ghi dữ liệu sang file JSON
    with open(json_file_path, mode='w') as json_file:
        json.dump(data, json_file, indent=4)
    
    print(f"Data with images from {csv_file_path} has been successfully converted to {json_file_path}")

# Sử dụng hàm
csv_to_json_with_images('pokemon_data_22.csv', 'pokemonData.json')


Data with images from pokemon_data_22.csv has been successfully converted to pokemonData.json
