# LoL Hero Crawler

## Code

In [2]:
import json
import os
import random
import time
from typing import Callable

import requests


# ----------------------------------------------------------------------
# API endpoints
# ----------------------------------------------------------------------
HERO_LIST_URL = (
    "https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js?ts=2944117"
)
HERO_DETAIL_URL_TEMPLATE = (
    "https://game.gtimg.cn/images/lol/act/img/js/hero/{}.js?ts=2944118"
)

# HTTP request headers
HEADERS = {
    "User-Agent": (
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/142.0.0.0 Safari/537.36"
    )
}

# Retry configuration
MAX_RETRIES = 3          # maximum retry attempts
RETRY_DELAY_RANGE = (1, 3)  # random sleep range between retries (seconds)
REQUEST_TIMEOUT = 10

# Hero role mapping (EN -> CN)
ROLE_MAP = {
    "assassin": "Âà∫ÂÆ¢",
    "tank": "Âù¶ÂÖã",
    "support": "ËæÖÂä©",
    "fighter": "ÊàòÂ£´",
    "marksman": "Â∞ÑÊâã",
    "mage": "Ê≥ïÂ∏à",
}

# Output paths
HERO_INFO_FILE = "lol_heroes.txt"
IMAGE_BASE_DIR = "HeroImages"


# ----------------------------------------------------------------------
# Retry mechanism
# ----------------------------------------------------------------------
def fetch_with_retry(request_func: Callable, *args, **kwargs):
    """
    Execute a request function with retry mechanism.

    :param request_func: function that performs a request
    :param args: positional arguments for the function
    :param kwargs: keyword arguments for the function
    :return: function return value
    :raises Exception: after exceeding max retries
    """
    for attempt in range(1, MAX_RETRIES + 1):
        try:
            return request_func(*args, **kwargs)
        except (requests.RequestException, json.JSONDecodeError) as e:
            print(f"[Retry {attempt}/{MAX_RETRIES}] Error: {e}")

            if attempt == MAX_RETRIES:
                raise

            sleep_time = random.uniform(*RETRY_DELAY_RANGE)
            time.sleep(sleep_time)


# ----------------------------------------------------------------------
# HTTP helpers
# ----------------------------------------------------------------------
def fetch_json(url: str) -> dict:
    """
    Send a GET request and parse the response as JSON.
    """
    response = requests.get(
        url, headers=HEADERS, timeout=REQUEST_TIMEOUT
    )
    response.raise_for_status()
    return json.loads(response.text)


def download_image(url: str, save_path: str) -> None:
    """
    Download an image and save it to disk.
    """
    response = requests.get(
        url, headers=HEADERS, timeout=REQUEST_TIMEOUT
    )
    response.raise_for_status()

    with open(save_path, "wb") as f:
        f.write(response.content)


def ensure_dir(path: str) -> None:
    """
    Create directory if it does not exist.
    """
    if not os.path.exists(path):
        os.makedirs(path)


# ----------------------------------------------------------------------
# Main crawler logic
# ----------------------------------------------------------------------
def main() -> None:
    # Fetch hero list with retry
    hero_list_json = fetch_with_retry(fetch_json, HERO_LIST_URL)
    heroes = hero_list_json["hero"]

    with open(HERO_INFO_FILE, mode="w", encoding="utf-8") as file:
        for hero in heroes:
            hero_id = hero["heroId"]
            hero_name = hero["name"]
            hero_title = hero["title"]

            # Convert hero roles to Chinese
            hero_roles = hero.get("roles", [])
            cn_roles = [ROLE_MAP.get(role, role) for role in hero_roles]
            hero_cn_roles = ",".join(cn_roles)

            # Fetch hero detail with retry
            detail_url = HERO_DETAIL_URL_TEMPLATE.format(hero_id)
            hero_detail = fetch_with_retry(fetch_json, detail_url)

            # ----------------------------------------------------------
            # Download hero skin images
            # ----------------------------------------------------------
            hero_img_dir = os.path.join(IMAGE_BASE_DIR, hero_name)
            ensure_dir(hero_img_dir)

            skin_index = 0
            for skin in hero_detail.get("skins", []):
                # Skip chromas
                if skin.get("chromas") == "1":
                    continue

                skin_name = skin["name"].replace("/", "|")
                skin_img_url = skin["centerImg"]

                image_filename = f"{skin_index:02d}-{skin_name}.jpg"
                image_path = os.path.join(hero_img_dir, image_filename)

                print(f"[IMG] {skin_img_url}")
                fetch_with_retry(download_image, skin_img_url, image_path)

                skin_index += 1

            # ----------------------------------------------------------
            # Save hero spell information
            # ----------------------------------------------------------
            for spell in hero_detail.get("spells", []):
                spell_name = spell["name"]
                spell_key = spell["spellKey"]
                spell_desc = spell["description"].replace("\n", " ").strip()

                line = "|".join([
                    hero_id,
                    hero_name,
                    hero_title,
                    hero_cn_roles,
                    spell_key,
                    spell_name,
                    spell_desc,
                ])
                # file.write(line + "\n")

            # Polite crawling between heroes
            time.sleep(random.randint(1, 3))


if __name__ == "__main__":
    main()


[IMG] https://game.gtimg.cn/images/lol/act/img/center/0b95894e-0df2-470e-b282-6c5f5cf41955.jpg
[IMG] https://game.gtimg.cn/images/lol/act/img/center/46358cd4-3f36-4987-9db8-aab046adf43f.jpg
[IMG] https://game.gtimg.cn/images/lol/act/img/center/5ba8083d-b371-4ad1-aaea-33ec5c3ac28b.jpg
[IMG] https://game.gtimg.cn/images/lol/act/img/center/c650518b-2ef4-4b1f-ad52-318c16974401.jpg
[IMG] https://game.gtimg.cn/images/lol/act/img/center/348e15f7-f9e5-4590-8f68-2eb4cb12fead.jpg
[IMG] https://game.gtimg.cn/images/lol/act/img/center/7f58e86d-bc85-46b2-9907-1b87ba459297.jpg
[IMG] https://game.gtimg.cn/images/lol/act/img/center/347e7256-e931-484e-8a0e-80b9cee3f674.jpg
[IMG] https://game.gtimg.cn/images/lol/act/img/center/cabef64a-fb32-4e71-b1fd-8cfb40b74181.jpg
[IMG] https://game.gtimg.cn/images/lol/act/img/center/c0f8be56-b911-47b1-a4f1-df620b5e6f35.jpg
[IMG] https://game.gtimg.cn/images/lol/act/img/center/a9951de2-e800-4ae6-9a57-7216bae4a664.jpg


KeyboardInterrupt: 

## Overview: What Does This Program Do?

This Python script is a **League of Legends (LoL) hero crawler**. It does the following:

1. Fetches the full hero list from Tencent‚Äôs LoL API
2. For each hero:

   * Fetches detailed hero data
   * Downloads all skin images
   * Extracts skill (spell) information
3. Saves:

   * Images to folders
   * Hero + skill info to a text file
4. Uses a **retry mechanism** to make the crawler more stable

This is a **real-world Python project** combining:

* HTTP requests
* JSON parsing
* File I/O
* Error handling
* Retry logic
* Clean code structure

---

## Required Knowledge Before Reading

You should roughly know:

* Python variables and functions
* Lists and dictionaries
* `for` loops
* Basic file operations (`open`, `write`)

If not, don‚Äôt worry‚Äîthis tutorial explains concepts as they appear.

---

## Importing Modules (Â∑•ÂÖ∑ÂáÜÂ§á)

```python
import json
import os
import random
import time
from typing import Callable

import requests
```

### What each module is used for

| Module            | Purpose                    |
| ----------------- | -------------------------- |
| `json`            | Parse JSON data from APIs  |
| `os`              | File paths and directories |
| `random`          | Random delays              |
| `time`            | Sleep between requests     |
| `typing.Callable` | Type hint for functions    |
| `requests`        | Send HTTP requests         |

> üí° **Beginner tip**
> If Python is a toolbox, `import` is how you take tools out of it.

---

## API Endpoints (Êï∞ÊçÆÊù•Ê∫ê)

```python
HERO_LIST_URL = "https://game.gtimg.cn/..."
HERO_DETAIL_URL_TEMPLATE = "https://game.gtimg.cn/.../{}.js"
```

### What these URLs mean

* `HERO_LIST_URL`
  ‚Üí returns **all heroes (basic info)**

* `HERO_DETAIL_URL_TEMPLATE`
  ‚Üí returns **one hero‚Äôs detailed info**, using:

  ```python
  HERO_DETAIL_URL_TEMPLATE.format(hero_id)
  ```

This is a common API design pattern.

---

## HTTP Headers: Pretending to Be a Browser

```python
HEADERS = {
    "User-Agent": "Mozilla/5.0 ..."
}
```

### Why this is important

Many servers:

* Block unknown clients
* Allow real browsers

By setting `User-Agent`, we:

* Reduce the chance of being blocked
* Look like Chrome on macOS

---

## Retry Configuration (Áà¨Ëô´ÂÅ•Â£ÆÊÄßÊ†∏ÂøÉ)

```python
MAX_RETRIES = 3
RETRY_DELAY_RANGE = (1, 3)
REQUEST_TIMEOUT = 10
```

### Meaning

| Variable            | Meaning                            |
| ------------------- | ---------------------------------- |
| `MAX_RETRIES`       | Retry up to 3 times                |
| `RETRY_DELAY_RANGE` | Sleep 1‚Äì3 seconds between retries  |
| `REQUEST_TIMEOUT`   | Fail if request takes > 10 seconds |

This prevents:

* Program crashing on temporary network issues
* Too frequent retrying (anti-ban risk)

---

## Role Mapping Dictionary

```python
ROLE_MAP = {
    "assassin": "Âà∫ÂÆ¢",
    "tank": "Âù¶ÂÖã",
    ...
}
```

### Why this exists

The API returns roles in **English**, but we want **Chinese output**.

This dictionary converts:

```text
assassin ‚Üí Âà∫ÂÆ¢
mage ‚Üí Ê≥ïÂ∏à
```

---

## Output Paths

```python
HERO_INFO_FILE = "lol_heroes.txt"
IMAGE_BASE_DIR = "HeroImages"
```

* Text info ‚Üí `lol_heroes.txt`
* Images ‚Üí `HeroImages/<HeroName>/`

Keeping output paths configurable is **good coding practice**.

---

## Retry Mechanism (ÈáçÁÇπÊ®°Âùó)

```python
def fetch_with_retry(request_func: Callable, *args, **kwargs):
```

### What this function does

It:

1. Executes another function
2. If it fails ‚Üí retries
3. Sleeps randomly between retries
4. Raises error after max retries

This is a **generic retry wrapper**.

---

### Core logic explained

```python
for attempt in range(1, MAX_RETRIES + 1):
    try:
        return request_func(*args, **kwargs)
```

* Try calling the request function
* If successful ‚Üí return immediately

---

### Handling errors safely

```python
except (requests.RequestException, json.JSONDecodeError) as e:
```

This catches:

* Network errors
* Invalid JSON data

Instead of crashing, it retries.

---

### Random sleep between retries

```python
sleep_time = random.uniform(*RETRY_DELAY_RANGE)
time.sleep(sleep_time)
```

This avoids:

* Hammering the server
* Triggering anti-crawler systems

---

## HTTP Helper Functions

### Fetching JSON Data

```python
def fetch_json(url: str) -> dict:
```

This function:

1. Sends a GET request
2. Checks HTTP status
3. Converts response text ‚Üí Python dict

Key line:

```python
return json.loads(response.text)
```

---

### Downloading Images

```python
def download_image(url: str, save_path: str) -> None:
```

Important detail:

```python
with open(save_path, "wb") as f:
```

* `"wb"` = write **binary**
* Required for images

---

### Ensuring Directories Exist

```python
def ensure_dir(path: str) -> None:
```

Why this is needed:

* Writing files to a non-existent folder causes errors
* This function makes the folder if missing

---

## Main Program Logic

```python
def main() -> None:
```

This is where everything is connected.

---

## Step 1: Fetch Hero List

```python
hero_list_json = fetch_with_retry(fetch_json, HERO_LIST_URL)
heroes = hero_list_json["hero"]
```

We now have:

* `heroes`: a list of hero dictionaries

---

## Step 2: Open Output File

```python
with open(HERO_INFO_FILE, "w", encoding="utf-8") as file:
```

* Uses UTF-8 (important for Chinese)
* Automatically closes the file after writing

---

## Step 3: Loop Through Heroes

```python
for hero in heroes:
```

Each `hero` contains:

* `heroId`
* `name`
* `title`
* `roles`

---

## Step 4: Role Translation

```python
cn_roles = [ROLE_MAP.get(role, role) for role in hero_roles]
```

This is a **list comprehension**.

Meaning:

> Convert each role to Chinese if possible.

---

## Step 5: Fetch Hero Details

```python
detail_url = HERO_DETAIL_URL_TEMPLATE.format(hero_id)
hero_detail = fetch_with_retry(fetch_json, detail_url)
```

Now we can access:

* Skins
* Spells
* Detailed descriptions

---

## Step 6: Download Skin Images

```python
hero_img_dir = os.path.join(IMAGE_BASE_DIR, hero_name)
ensure_dir(hero_img_dir)
```

Each hero gets its own folder.

---

### Skipping chroma skins

```python
if skin.get("chromas") == "1":
    continue
```

This avoids:

* Duplicate / variant images

---

### Safe image naming

```python
image_filename = f"{skin_index:02d}-{skin_name}.jpg"
```

* `:02d` ensures sorting:

  ```
  00-
  01-
  02-
  ```

---

## Step 7: Save Spell Information

```python
line = "|".join([...])
file.write(line + "\n")
```

This produces **structured text output**, easy to:

* Parse later
* Import into Excel / databases

---

## Polite Crawling

```python
time.sleep(random.randint(1, 3))
```

This:

* Slows down requests
* Reduces ban risk
* Mimics human behavior

---

## Program Entry Point

```python
if __name__ == "__main__":
    main()
```

Meaning:

* Run `main()` only when this file is executed directly
* Not when imported elsewhere

This is **standard Python style**.

---

## What You Should Learn From This Code

After understanding this script, you should be able to:

* Call real-world JSON APIs
* Build retry mechanisms
* Write robust crawlers
* Organize files cleanly
* Handle errors gracefully
* Write maintainable Python code

---

## Suggested Beginner Exercises

1. Save hero info as **JSON**
2. Add logging instead of `print`
3. Limit heroes to first 10 (testing mode)
4. Add command-line arguments
5. Count total downloaded images