# **About**

|Section|Details|
|---|---|
|Script|rs3-request-data|
|Description|rs3-request-data is used to retrieve historical price data for items in Runescape 3. This data will be transformed to predict future item prices.|
|Author|Andrew Yang|

# **Setup**

In [153]:
import requests # use to retrieve data from API
import math
import pandas as pd
import datetime as datetime
import os

In [112]:
# Global var for debugging
show_debug = True

# Global vars for Weird Gloop API
game_base = "rs" # API can have two options: rs (Runescape 3) or osrs (Old School Runescape)
data_filter = "all" # API has three options: all (all price data), last90d (last 90 days), and sample

# Determine which item category we are interested in.
# See the following link for all item categories (https://runescape.wiki/w/Application_programming_interface#category).
item_category = 13 

# **Data Gathering**

##  Retrieve items by category

### Get item categories

In [61]:
# See the following link for a description of all item categories (https://runescape.wiki/w/Application_programming_interface#category).
# i.e. 0 = Miscellaneous; 7 = Costumes; 13 = Herblore materials 
category_ids = range(0, 44, 1)
print(f"All category ids: {list(category_ids)}")

All category ids: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43]


### Retrieve items in category

In [66]:
# Grabs a list of dictionaries that show how many items are under each "alpha" (i.e. character)
request_category_base = "https://services.runescape.com/m=itemdb_rs/api/catalogue/category.json?"
r_category = requests.get(request_category_base, params = {"category": item_category})
print(r_category.status_code)

200


In [104]:
category_alpha_dict = r_category.json()["alpha"]

# print(f"Item alpha dict: ")
# display(category_alpha_dict)

Item alpha dict: 


[{'letter': '#', 'items': 0},
 {'letter': 'a', 'items': 6},
 {'letter': 'b', 'items': 7},
 {'letter': 'c', 'items': 32},
 {'letter': 'd', 'items': 5},
 {'letter': 'e', 'items': 1},
 {'letter': 'f', 'items': 2},
 {'letter': 'g', 'items': 29},
 {'letter': 'h', 'items': 2},
 {'letter': 'i', 'items': 2},
 {'letter': 'j', 'items': 3},
 {'letter': 'k', 'items': 2},
 {'letter': 'l', 'items': 6},
 {'letter': 'm', 'items': 13},
 {'letter': 'n', 'items': 1},
 {'letter': 'o', 'items': 0},
 {'letter': 'p', 'items': 6},
 {'letter': 'q', 'items': 0},
 {'letter': 'r', 'items': 5},
 {'letter': 's', 'items': 12},
 {'letter': 't', 'items': 5},
 {'letter': 'u', 'items': 2},
 {'letter': 'v', 'items': 3},
 {'letter': 'w', 'items': 6},
 {'letter': 'x', 'items': 0},
 {'letter': 'y', 'items': 2},
 {'letter': 'z', 'items': 1}]

In [94]:
# Define API endpoint for getting item info.
request_items_base = "https://services.runescape.com/m=itemdb_rs/api/catalogue/items.json?"

def get_item_details(req_category, req_alpha, req_page):
    r_items = requests.get(request_items_base, params = {"category": req_category, "alpha": req_alpha, "page": req_page})
    if show_debug:
        print(f"Status of item category|alpha|page ({req_category}|{req_alpha}|{req_page}): {r_items.status_code}")
    return r_items

In [99]:
item_dict_list = {}

# Add entries to a dictionary linking an item id to item name.
def get_item_id(item_request):
    for item in item_request.json()["items"]:
        item_dict_list[item["id"]] = item["name"]

In [101]:
for a in category_alpha_dict:
    if a["items"] <= 0: 
        continue
    
    req_requests =  math.ceil(a["items"] / 12) # Determines the # of times to request the API for all items.
    for page in range(1, req_requests + 1, 1):
        get_item_id(get_item_details(item_category, a["letter"], page))

Status of item category|alpha|page (13|a|1): 200
Status of item category|alpha|page (13|b|1): 200
Status of item category|alpha|page (13|c|1): 200
Status of item category|alpha|page (13|c|2): 200
Status of item category|alpha|page (13|c|3): 200
Status of item category|alpha|page (13|d|1): 200
Status of item category|alpha|page (13|e|1): 200
Status of item category|alpha|page (13|f|1): 200
Status of item category|alpha|page (13|g|1): 200
Status of item category|alpha|page (13|g|2): 200
Status of item category|alpha|page (13|g|3): 200
Status of item category|alpha|page (13|h|1): 200
Status of item category|alpha|page (13|i|1): 200
Status of item category|alpha|page (13|j|1): 200
Status of item category|alpha|page (13|k|1): 200
Status of item category|alpha|page (13|l|1): 200
Status of item category|alpha|page (13|m|1): 200
Status of item category|alpha|page (13|m|2): 200
Status of item category|alpha|page (13|n|1): 200
Status of item category|alpha|page (13|p|1): 200
Status of item categ

In [109]:
# print(f"Item id dictionary ({len(item_dict_list)} entries):")
# display(item_dict_list)

Item id dictionary (153 entries):


{52937: 'Abyssal flesh',
 39067: 'Adrenaline crystal',
 48241: 'Arbuck potion (unf)',
 592: 'Ashes',
 103: 'Avantoe potion (unf)',
 48578: 'Avocado',
 48925: 'Beak snot',
 37973: 'Bloodweed potion (unf)',
 243: 'Blue dragon scale',
 48961: 'Bomb vial',
 48926: 'Bottled dinosaur roar',
 4456: 'Bowl of hot water',
 43993: 'Bull horns',
 6016: 'Cactus spine',
 107: 'Cadantine potion (unf)',
 48586: 'Carambola',
 11326: 'Caviar',
 43973: 'Chinchompa residue',
 48584: 'Ciku',
 48211: 'Clean arbuck',
 261: 'Clean avantoe',
 37953: 'Clean bloodweed',
 265: 'Clean cadantine',
 267: 'Clean dwarf weed',
 21624: 'Clean fellstalk',
 249: 'Clean guam',
 255: 'Clean harralander',
 259: 'Clean irit',
 263: 'Clean kwuarm',
 2481: 'Clean lantadyme',
 251: 'Clean marrentill',
 257: 'Clean ranarr',
 3000: 'Clean snapdragon',
 12172: 'Clean spirit weed',
 253: 'Clean tarromin',
 2998: 'Clean toadflax',
 269: 'Clean torstol',
 14854: 'Clean wergali',
 12109: 'Cockatrice egg',
 5976: 'Coconut',
 5935: 'Coco

##  Retrieving item GE prices from API

In [113]:
# The RS3 Wiki uses Weird Gloop for their API, and includes support for retrieving their stored Grand Exchange data.
# We can avoid request limitations with Jagex's own Grand Exchange API, and get all historical data.
request_prices_base = f"https://api.weirdgloop.org/exchange/history/{game_base}/{data_filter}"
print(f"Historical GE prices request endpoint: {request_prices_base}")

Historical GE prices request endpoint: https://api.weirdgloop.org/exchange/history/rs/all


In [118]:
def get_historical_prices(item_filter):
    r_prices = requests.get(request_prices_base, params = {"id": item_filter})
    if show_debug:
        print(f"Status of item {item_filter} ({item_dict_list[item_filter]}):{r_prices.status_code}")
    return r_prices.json()[f"{item_filter}"]

In [119]:
ge_prices = []
for item_id_entry in item_dict_list.keys():
    ge_prices.extend(get_historical_prices(item_id_entry))

Status of item 52937 (Abyssal flesh):200
Status of item 39067 (Adrenaline crystal):200
Status of item 48241 (Arbuck potion (unf)):200
Status of item 592 (Ashes):200
Status of item 103 (Avantoe potion (unf)):200
Status of item 48578 (Avocado):200
Status of item 48925 (Beak snot):200
Status of item 37973 (Bloodweed potion (unf)):200
Status of item 243 (Blue dragon scale):200
Status of item 48961 (Bomb vial):200
Status of item 48926 (Bottled dinosaur roar):200
Status of item 4456 (Bowl of hot water):200
Status of item 43993 (Bull horns):200
Status of item 6016 (Cactus spine):200
Status of item 107 (Cadantine potion (unf)):200
Status of item 48586 (Carambola):200
Status of item 11326 (Caviar):200
Status of item 43973 (Chinchompa residue):200
Status of item 48584 (Ciku):200
Status of item 48211 (Clean arbuck):200
Status of item 261 (Clean avantoe):200
Status of item 37953 (Clean bloodweed):200
Status of item 265 (Clean cadantine):200
Status of item 267 (Clean dwarf weed):200
Status of item 

In [None]:
#display(ge_prices)

## Consolidate data into dataframe

In [123]:
# Convert our price list of dictionaries into a Pandas Dataframe.
ge_df = pd.DataFrame(ge_prices)

In [125]:
#display(ge_df)

Unnamed: 0,id,price,volume,timestamp
0,52937,1500,,1630540800000
1,52937,1500,,1630627200000
2,52937,1500,,1630713600000
3,52937,1500,,1630800000000
4,52937,1500,,1630886400000
...,...,...,...,...
722550,43983,502,14871.0,1721220601000
722551,43983,502,27393.0,1721328723000
722552,43983,483,9321.0,1721425246000
722553,43983,483,6821.0,1721492896000


# Data supplementation

In [128]:
# Checking how many null values are there - will most likely be within the volume.
ge_df.isna().sum() / len(ge_df)

id           0.000000
price        0.000000
volume       0.850994
timestamp    0.000000
dtype: float64

## Improve human readability

In [139]:
# Convert unix timestamp to date.
def unix_to_date(ts):
    return datetime.datetime.fromtimestamp(ts/1000, datetime.UTC).strftime('%Y-%m-%d')

In [212]:
# Determine if a unix timestamp represents a weekend or weekday.
def unix_is_weekday(ts):
    return 1 if datetime.datetime.fromtimestamp(ts/1000, datetime.UTC).weekday() < 5 else 0

In [150]:
# Adds name of item based on item id.
def id_to_name(item_id):
    return item_dict_list[int(item_id)]

In [213]:
# Make human-readable date, item name, and whether or not said date is a weekday.
ge_df["date"] = ge_df['timestamp'].map(unix_to_date)
ge_df["weekday"] = ge_df['timestamp'].map(unix_is_weekday)
ge_df["name"] = ge_df['id'].map(id_to_name)

## Add differenced time series

In [168]:
# Get differenced data by 1 day, 1 week, 2 weeks, and ~ 1 month. Great for time series.
ge_df["diff_1_day"] = ge_df.groupby("id")["price"].diff(1)
ge_df["diff_7_day"] = ge_df.groupby("id")["price"].diff(7)
ge_df["diff_14_day"] = ge_df.groupby("id")["price"].diff(14)
ge_df["diff_30_day"] = ge_df.groupby("id")["price"].diff(30)

## Add moving average time series

In [206]:
# Get 1 week, 2 week, and ~1 month moving average.
ge_df["avg_7_day"] = ge_df.groupby("name")["price"].rolling(7).mean().reset_index(drop=True)
ge_df["avg_14_day"] = ge_df.groupby("name")["price"].rolling(14).mean().reset_index(drop=True)
ge_df["avg_30_day"] = ge_df.groupby("name")["price"].rolling(30).mean().reset_index(drop=True)

## Add social media sourced information (future update)

In [218]:
# The RS3 Wiki uses Weird Gloop for their API; this endpoint gets all social media information.
request_socials_base = f"https://api.weirdgloop.org/runescape/social"
halt = False # Used in while loop - API response includes if there are additional pages left.
max_iter = 100 # Failsafe.

In [222]:
page = 1
social_dict_list = []

while halt != True and page <= max_iter: # As long as we haven't halted the process and page less than the max allowed
    r_social = requests.get(request_socials_base, params = {"page": page}) # Request social media info from API.
    
    if show_debug:
        print(f"Status of page {page}: {r_social.status_code}")
    page += 1

    if r_social.json()["pagination"]["has_more"] != True: # If the response tells use there's no more pages, halt the loop.
        halt = True

    social_dict_list.extend(r_social.json()["data"]) # Add dictionaries to our list.

Status of page 1: 200
Status of page 2: 200
Status of page 3: 200
Status of page 4: 200
Status of page 5: 200
Status of page 6: 200
Status of page 7: 200
Status of page 8: 200
Status of page 9: 200
Status of page 10: 200
Status of page 11: 200
Status of page 12: 200
Status of page 13: 200
Status of page 14: 200
Status of page 15: 200
Status of page 16: 200
Status of page 17: 200
Status of page 18: 200
Status of page 19: 200
Status of page 20: 200
Status of page 21: 200
Status of page 22: 200
Status of page 23: 200
Status of page 24: 200
Status of page 25: 200
Status of page 26: 200
Status of page 27: 200
Status of page 28: 200
Status of page 29: 200
Status of page 30: 200
Status of page 31: 200
Status of page 32: 200
Status of page 33: 200
Status of page 34: 200
Status of page 35: 200
Status of page 36: 200
Status of page 37: 200
Status of page 38: 200
Status of page 39: 200
Status of page 40: 200
Status of page 41: 200
Status of page 42: 200
Status of page 43: 200
Status of page 44: 2

In [224]:
# Get social media list of dicts into a dataframe.
social_df = pd.DataFrame(social_dict_list)

In [280]:
# Convert odd datetime format to string, and get date.
def datetime_to_string(dt):
    return str(dt)[:10]

In [282]:
# Create string version of date to link social media info to specific dates.
social_df["date"] = social_df["dateAdded"].map(datetime_to_string)

In [260]:
# Enrich social media history dataframe based on title of media item:
#   Launch usually indicates a new release.
#   Bosses are big drop sources of items, and may affect our items of interest.
#   Quests show info about an upcoming Runescape quest. These quests may unlock new things which require our items of interest.
#   Event usually indicates a new upcoming events.
#   Double XP tells players when the next Double XP is coming up, and is a known market mover.
#   Update is more general, but can include information on changes for any of the above info... or something irrelevant.
social_df["launch_update"] = social_df["title"].apply(lambda x: "Launch" in x if x is not None else False)
social_df["boss_update"] = social_df["title"].apply(lambda x: "Boss" in x if x is not None else False)
social_df["quest_update"] = social_df["title"].apply(lambda x: "Quest" in x if x is not None else False)
social_df["event_update"] = social_df["title"].apply(lambda x: "Event" in x if x is not None else False)
social_df["dxp_update"] = social_df["title"].apply(lambda x: "Double XP" in x if x is not None else False)
social_df["general_update"] = social_df["title"].apply(lambda x: "Update" in x if x is not None else False)

In [286]:
# Get cumulative counts of all social media update types.
social_df["launch_cumul"] = social_df.loc[::-1, 'launch_update'].cumsum()
social_df["boss_cumul"] = social_df.loc[::-1, 'boss_update'].cumsum()
social_df["quest_cumul"] = social_df.loc[::-1, 'quest_update'].cumsum()
social_df["event_cumul"] = social_df.loc[::-1, 'event_update'].cumsum()
social_df["dxp_cumul"] = social_df.loc[::-1, 'dxp_update'].cumsum()
social_df["general_cumul"] = social_df.loc[::-1, 'general_update'].cumsum()

In [287]:
social_cumul_df = social_df[['date', 'launch_update', 'quest_update', 'event_update', 'dxp_update', 
                            'general_update', 'boss_update', 'launch_cumul', 'boss_cumul','quest_cumul', 
                            'event_cumul', 'dxp_cumul', 'general_cumul']]

#display(social_cumul_df)

# View results

In [214]:
display(ge_df)

Unnamed: 0,id,price,volume,timestamp,date,name,diff_1_day,diff_7_day,diff_14_day,diff_30_day,avg_7_day,avg_14_day,avg_30_day,weekday
0,52937,1500,,1630540800000,2021-09-02,Abyssal flesh,,,,,,,,1
1,52937,1500,,1630627200000,2021-09-03,Abyssal flesh,0.0,,,,,,,1
2,52937,1500,,1630713600000,2021-09-04,Abyssal flesh,0.0,,,,,,,0
3,52937,1500,,1630800000000,2021-09-05,Abyssal flesh,0.0,,,,,,,0
4,52937,1500,,1630886400000,2021-09-06,Abyssal flesh,0.0,,,,,,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
722550,43983,502,14871.0,1721220601000,2024-07-17,Zygomite fruit,-12.0,-25.0,-20.0,-8.0,512.285714,515.857143,520.200000,1
722551,43983,502,27393.0,1721328723000,2024-07-18,Zygomite fruit,0.0,-12.0,-15.0,-8.0,510.571429,514.785714,519.933333,1
722552,43983,483,9321.0,1721425246000,2024-07-19,Zygomite fruit,-19.0,-31.0,-34.0,-41.0,506.142857,512.357143,518.566667,1
722553,43983,483,6821.0,1721492896000,2024-07-20,Zygomite fruit,0.0,-31.0,-33.0,-41.0,501.714286,510.000000,517.200000,0


# Export data

In [284]:
#social_cumul_df.to_csv("social_info.csv", index = False)

In [208]:
data_dest = f"{os.getcwd()}\\data"
print(f"Data destination: {data_dest}")

today = datetime.date.today().strftime('%Y-%m-%d')
print(f"Current date: {today}")

filename = f"ge-prices-{today}.csv"
print(f"Final file location: {data_dest}\\{filename}")

Data destination: c:\Users\yangs\Documents\Coding\rs3-ml\data
Current date: 2024-07-21
Final file location: c:\Users\yangs\Documents\Coding\rs3-ml\data\ge-prices-2024-07-21.csv


In [209]:
# ge_df.to_csv(filename, index=False)  