In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [2]:
items_url = 'https://en.uesp.net/wiki/Skyrim:Items'
items_page = requests.get(items_url)
items_soup = BeautifulSoup(items_page.text, 'html.parser')

In [3]:
main = items_soup.find('div',{'id':'mw-content-text'})
links = list(set([a['href'] for a  in main.find_all('a')]))

In [4]:
def image_to_alt(text):
    soup = BeautifulSoup(text, 'html.parser')
    images = soup.findAll('img')
    for image in images:
        new_tag = soup.new_tag("p")
        new_tag.string = image['alt']
        image.replace_with(new_tag)
    return str(soup)

In [5]:
all_dfs = []
for link in links:
    if ('Skyrim:' in link) and ('edit' not in link):
        page_url = f'https://en.uesp.net{link}'
        page = requests.get(page_url)
        stripped = image_to_alt(page.text)
        dfs = []
        try:
            dfs = pd.read_html(stripped, header=0, attrs = {'class':'wikitable'})
        except:
            print(f'No tables found on page {link}')
        if len(dfs)>0:
            for df in dfs:
                df['page'] = link
                df.to_csv(f'items/{link.replace("/","_")}')
                if df.shape[0]<1: print(f'Warning: Shape {df.shape} at {page_url}\n')
                all_dfs.append(df)

No tables found on page /wiki/Skyrim:Magic_Items
No tables found on page /wiki/Skyrim:Dragonborn_Items
No tables found on page /wiki/Skyrim:Dawnguard_Items
No tables found on page /wiki/Skyrim:Creation_Club_Items
No tables found on page /wiki/Skyrim:Hearthfire_Items
No tables found on page /wiki/Skyrim:Unique_Items
No tables found on page /wiki/Skyrim:Permanent_Items


In [96]:
combined_df = pd.DataFrame(columns=['Name (ID)', 'Weight', 'Value', 'page'])
for df in all_dfs:
    if ("Name (ID)" in df.columns) and ("Weight" in df.columns) and ("Value" in df.columns) and ("page" in df.columns):
        filt_df = df.filter(['Name (ID)', 'Weight', 'Value', 'page'])
        filt_df['page'] = [p.replace('/wiki/Skyrim:', '').replace('_',' ') for p in filt_df['page']]
        filt_df['Name (ID)'] = [str(n).split('(')[0][:-1].replace('CC','').replace('DB','').replace('DG','') for n in filt_df['Name (ID)']]
        combined_df = pd.concat([filt_df, combined_df])
combined_df = combined_df[combined_df['Name (ID)'] != "Name (ID)"]
combined_df.drop_duplicates(subset=['Name (ID)'])
combined_df = combined_df[combined_df['Name (ID)'].str.contains("Total")==False]
combined_df[['Weight','Value']] = combined_df[['Weight','Value']].apply(pd.to_numeric, errors='coerce')
combined_df.dropna(inplace=True)

In [97]:
combined_df.sort_values(by='Weight', ascending=False)

Unnamed: 0,Name (ID),Weight,Value,page
1,Madness Armor,52.0,4100.0,Madness Ore
14,Madness Armor,52.0,4100.0,Armor
1,Daedric Armor,50.0,3200.0,Daedric
15,Daedric Armor,50.0,3200.0,Armor
13,Daedric Plate Armor,50.0,3200.0,Armor
...,...,...,...,...
9,Glass Arrow,0.0,6.0,Malachite
9,Dragonbone Arrow,0.0,9.0,Dragonplate
9,Elven Arrow,0.0,5.0,Moonstone
8,Iron Arrow,0.0,1.0,Iron


In [98]:
import plotly.express as px
import plotly.graph_objects as go

In [99]:
fig = px.scatter(combined_df, x='Weight', y='Value', color='page', hover_name="Name (ID)", log_x=True, log_y=True)
fig.update_layout(template='plotly_white')
fig.write_html('scatter.html')
fig

In [100]:
import numpy as np

In [101]:
combined_df['Weight_jitter']= combined_df['Weight'].apply(lambda n: n*(np.random.normal(loc=1, scale=.01)))
combined_df['Value_jitter']= combined_df['Value'].apply(lambda n: n*(np.random.normal(loc=1, scale=.01)))

In [105]:
jitter = True

fig = go.Figure()

for multiplier, color in zip([30, 20, 10, 5], ['limegreen', 'yellow', 'orange', 'red']):
    fig.add_trace(go.Scatter(
        x=[.1, max(combined_df['Value']), .1, .1],
        y=[.1/multiplier, max(combined_df['Value'])/multiplier, max(combined_df['Value'])/multiplier, .1/multiplier],
        mode='lines',
        line=dict(width=0, color=color),
        name=f'Value is {multiplier}x Weight',
        hoverinfo='skip',
        fill="toself",
        opacity=1
    )
                 )
    
fig.add_annotation(x=0.3, y=1.7,
            text="<b>Low Value<br>to Weight</b>",
            font=dict(size=16, color="#000"),
            showarrow=False)

fig.add_annotation(x=3, y=-1.2,
            text="<b>High Value<br>to Weight</b>",
            font=dict(size=24, color="limegreen"),
            showarrow=False)

fig.add_annotation(x=1, y=0.01, xref='paper', yref='paper',
            text="Source: <a href='https://en.uesp.net/wiki/Skyrim:Items' target='_blank'>UESP (Unofficial Elder Scrolls Pages)</a><br>Visualization by <a href='http://universalities.com' target='_blank'>Jane Adams</a>",
            font=dict(size=12, color="gray"),
            align='right',
            showarrow=False)
    
fig.add_trace(go.Scatter(y=combined_df[f'Weight{"_jitter" if jitter else ""}'], x=combined_df[f'Value{"_jitter" if jitter else ""}'],
                    mode='markers',
                    name='Item',
                    hovertext=[f'<b>{name}</b><br>Weight: {str(w).replace(".0","")}<br>Value: {int(v)}' for name, w, v in zip(combined_df['Name (ID)'], combined_df['Weight'], combined_df['Value'])],
                    hoverinfo="text",
                    #text=combined_df["Name (ID)"],
              marker=dict(size=5,
                color='black',
                         opacity=0.4)
             ))

fig.update_yaxes(type="log", title='Weight (Log Scale)', range=[-2, 2])
fig.update_xaxes(type="log", title='Value (Log Scale)', range=[-0.5, 4])
fig.update_layout(template='plotly_white', width=800, height=800, title='<b>Skyrim</b><br>Item Weight vs. Value', title_x=1)
fig.show()
fig.write_html('skyrim_items.html')
