In [32]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from bs4 import BeautifulSoup


# Data scraping Exercise

## Starting Point

As an all-time favorite game of mine since forever, I was extremely happy when I found that there is a database website for all things concerning Diablo IV. Thus, the idea for this mini exercise was born.

The website that is scraped is [Diablo IV](https://diablo4.cc).

### First Things First - Check the Permissions

The `robots.txt` file at `https://diablo4.cc/robots.txt` provides information about what can and cannot be scraped. In this case, everything is allowed:

```plaintext
User-agent: *
Allow: /


### Axes

Let's first concentrate on axes

In [37]:
diablo4_axes = requests.get("https://diablo4.cc/Axe")
parser = BeautifulSoup(diablo4_axes.content, "html.parser")

In [38]:
rows = parser.find_all("div",class_= "row row-cols-1 row-cols-lg-3 g-2")

In [39]:
rows

[<div class="row row-cols-1 row-cols-lg-3 g-2"><div class="col">
 <div class="d-flex border rounded" style="position: relative">
 <div class="header-icon" style='background-image: url("https://cdn.diablo4.cc/image/texture/2DInventory_Axes/35197504.webp")'></div>
 <div class="card-body"><div><a class="c_unique text-decoration-none item-name" data-hover="?s=Item/1306219" href="Waxing_Gibbous">Waxing Gibbous</a></div><div class="c_unique item-type">Unique Axe</div><div>925 Item Power</div><hr class="half-width separator"/><div class="damage">1,838 Damage Per Second</div><ul class="damage-details"><li>[1,337 – 2,005] Damage per Hit</li><li>1.10 Attacks per Second (Fast)</li></ul><li class="empty-bullet">+28% Damage to Healthy Enemies</li><hr class="clearfix separator"/><li class="empty-bullet">+[14 – 21]% Critical Strike Damage</li><li class="empty-bullet">+[16.5 – 23.5]% Damage to Close Enemies</li><li class="empty-bullet">+[21 – 35]% Damage to Injured Enemies</li><li class="empty-bullet"

In [43]:
rows = parser.find_all("div",class_= "row row-cols-1 row-cols-lg-3 g-2")
# Extracting data from each row
for row in rows:
    cols = row.find_all('div', class_='col')
    for col in cols:
        item_name_elem = col.find('a', class_='item-name')
        item_type_elem = col.find('div', class_='item-type')
        item_power_elem = col.find('div', string=lambda x: 'Item Power' in x if x else False)
        damage_per_second_elem = col.find('div', class_='damage')
        
        item_name = item_name_elem.text.strip() if item_name_elem else 'N/A'
        item_type = item_type_elem.text.strip() if item_type_elem else 'N/A'
        item_power = item_power_elem.text.strip() if item_power_elem else 'N/A'
        damage_per_second = damage_per_second_elem.text.strip() if damage_per_second_elem else 'N/A'
        
        # Collecting all details into a dictionary
        d4axes = {
            'Name': item_name,
            'Type': item_type,
            'Item Power': item_power,
            'Damage per Second': damage_per_second
        }
        
        # Print the details
        print(d4axes)

{'Name': 'Waxing Gibbous', 'Type': 'Unique Axe', 'Item Power': '925 Item Power', 'Damage per Second': '1,838 Damage Per Second'}
{'Name': "The Butcher's Cleaver", 'Type': 'Unique Axe', 'Item Power': '925 Item Power', 'Damage per Second': '1,838 Damage Per Second'}
{'Name': 'Bearded Axe', 'Type': 'Legendary Axe', 'Item Power': '925 Item Power', 'Damage per Second': '1,838 Damage Per Second'}
{'Name': 'Marauder Axe', 'Type': 'Legendary Axe', 'Item Power': '925 Item Power', 'Damage per Second': '1,838 Damage Per Second'}
{'Name': 'Bullova', 'Type': 'Legendary Axe', 'Item Power': '925 Item Power', 'Damage per Second': '1,838 Damage Per Second'}
{'Name': 'Ravager', 'Type': 'Legendary Axe', 'Item Power': '925 Item Power', 'Damage per Second': '1,838 Damage Per Second'}
{'Name': 'Snake Axe', 'Type': 'Legendary Axe', 'Item Power': '925 Item Power', 'Damage per Second': '1,838 Damage Per Second'}
{'Name': 'Octopian Edge', 'Type': 'Legendary Axe', 'Item Power': '925 Item Power', 'Damage per Seco

we get inidividual lines, we need to fix the code to have them in a list

In [47]:
# Fetch the page content
url = 'https://diablo4.cc/Axe'
response = requests.get(url)
if response.status_code != 200:
    raise Exception(f"Failed to load page {url}")

# Parse the HTML content
soup = BeautifulSoup(response.content, 'html.parser')

# Find all rows
rows = soup.find_all("div", class_="row row-cols-1 row-cols-lg-3 g-2")

# Initialize an empty list to hold the dictionaries
d4axes_list = []

# Extracting data from each row
for row in rows:
    cols = row.find_all('div', class_='col')
    for col in cols:
        item_name_elem = col.find('a', class_='item-name')
        item_type_elem = col.find('div', class_='item-type')
        item_power_elem = col.find('div', string=lambda x: 'Item Power' in x if x else False)
        damage_per_second_elem = col.find('div', class_='damage')
        
        item_name = item_name_elem.text.strip() if item_name_elem else 'N/A'
        item_type = item_type_elem.text.strip() if item_type_elem else 'N/A'
        item_power = item_power_elem.text.strip() if item_power_elem else 'N/A'
        damage_per_second = damage_per_second_elem.text.strip() if damage_per_second_elem else 'N/A'
        
        # Collecting all details into a dictionary
        d4axes = {
            'Name': item_name,
            'Type': item_type,
            'Item Power': item_power,
            'Damage per Second': damage_per_second
        }
        
        # Append the dictionary to the list
        d4axes_list.append(d4axes)

# Print the list of dictionaries
for axe in d4axes_list:
    print(axe)

# Optional: Convert the list of dictionaries to a DataFrame
d4axes_data = pd.DataFrame(d4axes_list)
print(d4axes_data)

{'Name': 'Waxing Gibbous', 'Type': 'Unique Axe', 'Item Power': '925 Item Power', 'Damage per Second': '1,838 Damage Per Second'}
{'Name': "The Butcher's Cleaver", 'Type': 'Unique Axe', 'Item Power': '925 Item Power', 'Damage per Second': '1,838 Damage Per Second'}
{'Name': 'Bearded Axe', 'Type': 'Legendary Axe', 'Item Power': '925 Item Power', 'Damage per Second': '1,838 Damage Per Second'}
{'Name': 'Marauder Axe', 'Type': 'Legendary Axe', 'Item Power': '925 Item Power', 'Damage per Second': '1,838 Damage Per Second'}
{'Name': 'Bullova', 'Type': 'Legendary Axe', 'Item Power': '925 Item Power', 'Damage per Second': '1,838 Damage Per Second'}
{'Name': 'Ravager', 'Type': 'Legendary Axe', 'Item Power': '925 Item Power', 'Damage per Second': '1,838 Damage Per Second'}
{'Name': 'Snake Axe', 'Type': 'Legendary Axe', 'Item Power': '925 Item Power', 'Damage per Second': '1,838 Damage Per Second'}
{'Name': 'Octopian Edge', 'Type': 'Legendary Axe', 'Item Power': '925 Item Power', 'Damage per Seco

In [51]:

# Fetch the page content
url = 'https://diablo4.cc/Axe'
response = requests.get(url)
if response.status_code != 200:
    raise Exception(f"Failed to load page {url}")

# Parse the HTML content
soup = BeautifulSoup(response.content, 'html.parser')

# Find all rows
rows = soup.find_all("div", class_="row row-cols-1 row-cols-lg-3 g-2")

# Initialize an empty list to hold the dictionaries
d4axes_list = []

# Extracting data from each row
for row in rows:
    cols = row.find_all('div', class_='col')
    for col in cols:
        item_name_elem = col.find('a', class_='item-name')
        item_type_elem = col.find('div', class_='item-type')
        item_power_elem = col.find('div', string=lambda x: 'Item Power' in x if x else False)
        damage_per_second_elem = col.find('div', class_='damage')
        
        item_name = item_name_elem.text.strip() if item_name_elem else 'N/A'
        item_type = item_type_elem.text.strip() if item_type_elem else 'N/A'
        item_power = item_power_elem.text.strip() if item_power_elem else 'N/A'
        damage_per_second = damage_per_second_elem.text.strip() if damage_per_second_elem else 'N/A'
        
        # Extract additional damage details
        damage_details_elem = col.find('ul', class_='damage-details')
        if damage_details_elem:
            damage_details = [li.text.strip() for li in damage_details_elem.find_all('li')]
        else:
            damage_details = ['N/A', 'N/A']
        
        # Collecting all details into a dictionary
        d4axes = {
            'Name': item_name,
            'Type': item_type,
            'Item Power': item_power,
            'Damage per Second': damage_per_second,
            'Damage per Hit': damage_details[0] if len(damage_details) > 0 else 'N/A',
            'Attacks per Second': damage_details[1] if len(damage_details) > 1 else 'N/A'
        }
        
        # Append the dictionary to the list
        d4axes_list.append(d4axes)

# Convert the list of dictionaries to a DataFrame
d4axes_data = pd.DataFrame(d4axes_list)

# Display the DataFrame
print(d4axes_data)

                             Name           Type      Item Power  \
0                  Waxing Gibbous     Unique Axe  925 Item Power   
1           The Butcher's Cleaver     Unique Axe  925 Item Power   
2                     Bearded Axe  Legendary Axe  925 Item Power   
3                    Marauder Axe  Legendary Axe  925 Item Power   
4                         Bullova  Legendary Axe  925 Item Power   
5                         Ravager  Legendary Axe  925 Item Power   
6                       Snake Axe  Legendary Axe  925 Item Power   
7                   Octopian Edge  Legendary Axe  925 Item Power   
8                             Axe            Axe  925 Item Power   
9                        Hand Axe            Axe  925 Item Power   
10                      Great Axe            Axe  925 Item Power   
11                   Cutter's Axe      Magic Axe  925 Item Power   
12                       Hand Axe            Axe  925 Item Power   
13                        War Axe            Axe

In [53]:
d4axes_data.to_csv('d4axes_data.csv', index=False)

In [55]:
d4axes_data

Unnamed: 0,Name,Type,Item Power,Damage per Second,Damage per Hit,Attacks per Second
0,Waxing Gibbous,Unique Axe,925 Item Power,"1,838 Damage Per Second","[1,337 – 2,005] Damage per Hit",1.10 Attacks per Second (Fast)
1,The Butcher's Cleaver,Unique Axe,925 Item Power,"1,838 Damage Per Second","[1,337 – 2,005] Damage per Hit",1.10 Attacks per Second (Fast)
2,Bearded Axe,Legendary Axe,925 Item Power,"1,838 Damage Per Second","[1,337 – 2,005] Damage per Hit",1.10 Attacks per Second (Fast)
3,Marauder Axe,Legendary Axe,925 Item Power,"1,838 Damage Per Second","[1,337 – 2,005] Damage per Hit",1.10 Attacks per Second (Fast)
4,Bullova,Legendary Axe,925 Item Power,"1,838 Damage Per Second","[1,337 – 2,005] Damage per Hit",1.10 Attacks per Second (Fast)
5,Ravager,Legendary Axe,925 Item Power,"1,838 Damage Per Second","[1,337 – 2,005] Damage per Hit",1.10 Attacks per Second (Fast)
6,Snake Axe,Legendary Axe,925 Item Power,"1,838 Damage Per Second","[1,337 – 2,005] Damage per Hit",1.10 Attacks per Second (Fast)
7,Octopian Edge,Legendary Axe,925 Item Power,"1,838 Damage Per Second","[1,337 – 2,005] Damage per Hit",1.10 Attacks per Second (Fast)
8,Axe,Axe,925 Item Power,"1,838 Damage Per Second","[1,337 – 2,005] Damage per Hit",1.10 Attacks per Second (Fast)
9,Hand Axe,Axe,925 Item Power,"1,838 Damage Per Second","[1,337 – 2,005] Damage per Hit",1.10 Attacks per Second (Fast)


### now to remove the unnecessary strings 

In [57]:
d4axes_data['Item Power'] = d4axes_data['Item Power'].str.replace(' Item Power', '')

In [59]:
d4axes_data

Unnamed: 0,Name,Type,Item Power,Damage per Second,Damage per Hit,Attacks per Second
0,Waxing Gibbous,Unique Axe,925,"1,838 Damage Per Second","[1,337 – 2,005] Damage per Hit",1.10 Attacks per Second (Fast)
1,The Butcher's Cleaver,Unique Axe,925,"1,838 Damage Per Second","[1,337 – 2,005] Damage per Hit",1.10 Attacks per Second (Fast)
2,Bearded Axe,Legendary Axe,925,"1,838 Damage Per Second","[1,337 – 2,005] Damage per Hit",1.10 Attacks per Second (Fast)
3,Marauder Axe,Legendary Axe,925,"1,838 Damage Per Second","[1,337 – 2,005] Damage per Hit",1.10 Attacks per Second (Fast)
4,Bullova,Legendary Axe,925,"1,838 Damage Per Second","[1,337 – 2,005] Damage per Hit",1.10 Attacks per Second (Fast)
5,Ravager,Legendary Axe,925,"1,838 Damage Per Second","[1,337 – 2,005] Damage per Hit",1.10 Attacks per Second (Fast)
6,Snake Axe,Legendary Axe,925,"1,838 Damage Per Second","[1,337 – 2,005] Damage per Hit",1.10 Attacks per Second (Fast)
7,Octopian Edge,Legendary Axe,925,"1,838 Damage Per Second","[1,337 – 2,005] Damage per Hit",1.10 Attacks per Second (Fast)
8,Axe,Axe,925,"1,838 Damage Per Second","[1,337 – 2,005] Damage per Hit",1.10 Attacks per Second (Fast)
9,Hand Axe,Axe,925,"1,838 Damage Per Second","[1,337 – 2,005] Damage per Hit",1.10 Attacks per Second (Fast)
