The goal of this notebook is to understand the original data structure and define basic operations.

The source data explored here has been previously downloaded from Path of Exile stash tabs API to local .txt files. Each file contained in a specific folder represent a single API call to http://www.pathofexile.com/api/public-stash-tabs?id= with a proper **nextChangeId**: this id is iteratively applyed on the next API call.

# TODO

In [1]:
import os
import re
import json
import random

import pandas as pd

In [2]:
pd.set_option('display.max_columns', 500)

DATA_PATH = '../../data/temp/stashes_delirium/'

LEAGUE = 'Delirium'

CURRENCY_KEY = ['alt', 'fuse', 'alch', 'chaos', 'gcp', 'exa', 'chrom', 'jew', 'chance', 'chisel', 'scour', 'blessed', 'regret', 'regal', 'divine', 'vaal', 'silver']
CURRENCY_NAME = ["Orb of Alteration", "Orb of Fusing", "Orb of Alchemy", "Chaos Orb", "Gemcutter's Prism", "Exalted Orb", "Chromatic Orb", "Jeweller's Orb", "Orb of Chance", "Cartographer's Chisel", "Orb of Scouring", "Blessed Orb", "Orb of Regret", "Regal Orb", "Divine Orb", "Vaal Orb", "Silver Coin"]
FRAME_TYPES = {0: 'normal', 1: 'magic', 2:'rare', 3: 'unique', 4: 'gem', 5: 'currency', 6: 'divination card', 7: 'quest item', 8: 'prophecy', 9: 'relic'}


ITEMS_CATEGORIES = ['accessories', 'armour', 'jewels', 'weapons', 'currency']



In [31]:
def get_stashes_dict(path):
    with open(path, 'rb') as file:
        try:
            return json.load(file)['stashes']
        except:
            print('ERROR: ', path)
            return []
        
def get_stashes_items(verbose=False, target_category=None):
    items = list()
    N = 100
    files = os.listdir(DATA_PATH)

#     if jewels_ony:
#         pass
#     else:
    
    # select N random values if possible
    files = random.sample(files, N if len(files) > N else len(files))       
        
    # select a fixed number of files
    for v in files:
        # for v in os.listdir(DATA_PATH):
        for stash in get_stashes_dict(DATA_PATH + v):
            if len(stash['items']) > 0 and stash['public']:
                # add stash related data to single items
                for item in stash['items']:
                    item['stashNote'] = stash['stash']
                    item['league'] = stash['league']
                    
                    if target_category is not None and item['extended']['category'] in target_category:
                        items.append(item)
                    elif target_category is None:
                        items.append(item)
                    else:
                        pass
    return items

def get_string_price(string):
    if re.match('(~price|~b/o)\s\d+(\.\d+)?(/\d+(\.\d+)?)?\s\w+', string) is not None:
        tokens = string.split(' ')

        currency, quantity = tokens[2], tokens[1]
        if re.match('\d+((\.|/)\d+)?/\d+((\.|/)\d+)?', quantity):
            quantity, sellingStackSize = quantity.split('/')
            quantity = round(float(quantity), 2)
            sellingStackSize = round(float(sellingStackSize), 2)
        else:
            quantity = round(float(quantity), 2)
            sellingStackSize = None

        # check if currency and quantity are allowed values
        if currency in CURRENCY_KEY:
            return currency, quantity, sellingStackSize
    raise ValueError()
    
def get_empty_columns(df):
    return [col for col in df.columns if all(df[col].isna())]

# Data processing

## Initial manipulation and filtering

In [43]:
items = pd.DataFrame(get_stashes_items())

# alphabetically order dataframe columns
items = items[sorted(items.columns)]

items.shape

(264623, 54)

In [44]:
# extract items category and subcategory
items['subCategory'] = items.extended.apply(lambda y: ' '.join(y['subcategories']) 
                                            if 'subcategories' in y else None)
items['category'] = items.extended.apply(lambda y: y['category'])

# drop unwanted categories
items = items[items.category.isin(ITEMS_CATEGORIES)]

# filter out unwanted subcategories
items.drop(index=items[(items.subCategory=='cluster') & (items.category=='jewels') ].index, 
           inplace=True)

items.shape

(166742, 56)

In [45]:
# extract items price and drop unpriced items
price = []
for k, v in items.iterrows():
    try:
        # item has a valid price
        item_price = get_string_price(v.note)
    except:
        # try:
            # stash has a valid price
            # item_price = get_string_price(v.stashNote)
        # except:
        # both item and stash have no valid price
        item_price = (None, None, None)
    price.append(item_price)
(items['priceCurrency'], items['priceQuantity'], items['sellingQuantity']) = zip(*price)

# filter out unwanted selling currencies
items = items[(items.priceCurrency.isin(CURRENCY_KEY)) & (items.priceQuantity != 0)]

items.shape

(42656, 59)

## Mod based items processing

In [76]:
mitems = items[items.category.isin(['jewels', 'armour', 'weapons', 'accessories'])].copy()
mitems.shape

(40218, 59)

In [77]:
mitems['rarity'] = mitems.frameType.apply(lambda y: FRAME_TYPES[y])
mitems = mitems[(mitems.rarity.isin(['normal', 'magic', 'rare'])) | (mitems.category=='currency')]
mitems.shape

(30863, 60)

In [78]:
mitems['nPrefixes'] = mitems.extended.apply(lambda y: y['prefixes'] if 'prefixes' in y else None)
mitems['nSuffixes'] = mitems.extended.apply(lambda y: y['suffixes'] if 'suffixes' in y else None)

In [79]:
# explode "influences" column into distinct dataframe columns
mitems_influences = mitems['influences'].apply(lambda y: y if isinstance(y, dict) else {})
mitems_influences = pd.DataFrame(mitems_influences.to_list(), index=mitems_influences.index)
mitems_influences.columns = ['influence{}'.format(v.capitalize()) for v in mitems_influences.columns]
mitems = pd.merge(mitems, mitems_influences, left_index=True, right_index=True, how='left')
mitems.shape

(30863, 68)

In [80]:
def mitems_prop_formatting(mitems, target_props):
    mitems_props_vocabulary = []
    mitems_props = []
    
    t1 = []
    for k, v in mitems.iterrows():
        for target_prop in target_props:
            if isinstance(v[target_prop], list):
                for prop in [p for p in v[target_prop] if len(p['values']) > 0]:
                    item_prop = dict()
                    item_prop['itemId'] = k
                    generic_prop = prop['name']
                    try:
                        prop_index = mitems_props_vocabulary.index(generic_prop)
                    except:
                        mitems_props_vocabulary.append(generic_prop)
                        prop_index = len(mitems_props_vocabulary) - 1
                    item_prop['propId'] = prop_index
                    for i, v in enumerate(prop['values'][0][0].split('-')):
                        try:
                            item_prop['value{}'.format(i)] = float(re.sub('(\+|-|%)', '', v))
                        except:
                            break
                    mitems_props.append(item_prop)
    mitems_props = pd.DataFrame(mitems_props)
    mitems_props_vocabulary = pd.DataFrame(mitems_props_vocabulary, columns=['text'])
    return mitems_props, mitems_props_vocabulary

def mitems_mod_formatting(mitems, mod_types):
    mitems_mods_vocabulary = []
    mitems_mods = []
    for k, item in mitems.iterrows():
        for mod_type in mod_types:
            if isinstance(item[mod_type], list):
                for mod in item[mod_type]:
                    item_mod = dict()
                    item_mod['itemId'] = k
                    generic_mod = re.sub('\d+', '#', mod)
                    try:
                        mod_index = mitems_mods_vocabulary.index((generic_mod, mod_type))
                    except:
                        mitems_mods_vocabulary.append((generic_mod, mod_type))
                        mod_index = len(mitems_mods_vocabulary) - 1
                    item_mod['modId'] = mod_index
                    for i, v in enumerate(re.findall('\d+', mod)):
                        item_mod['value{}'.format(i)] = int(v)
                    mitems_mods.append(item_mod)
    mitems_mods = pd.DataFrame(mitems_mods)
    mitems_mods_vocabulary = pd.DataFrame(mitems_mods_vocabulary, columns=['text', 'modType'])
    return mitems_mods, mitems_mods_vocabulary

mitems_prop, mitems_prop_voc = mitems_prop_formatting(mitems[mitems.category!='currency'], ['properties'])
mitems_mods, mitems_mods_voc = mitems_mod_formatting(mitems[mitems.category!='currency'], ['craftedMods', 'enchantMods', 'explicitMods', 'implicitMods'])

# jewels only
# mitems_mods, mitems_mods_voc = mitems_mod_formatting(mitems[mitems.category!='currency'], ['explicitMods', 'implicitMods'])

In [81]:
mitems_req = mitems['requirements'].apply(lambda y: {req['name']: int(req['values'][0][0]) for req in y} if isinstance(y, list) else {})
mitems_req = pd.DataFrame(mitems_req.to_list(), index=mitems_req.index)
mitems_req.columns = ['requirement{}'.format(col_name.capitalize()) for col_name in mitems_req.columns]

if 'requirementStrength' in mitems_req:
    mitems_req.loc[mitems_req.requirementStrength.notna(), 'requirementStr'] = mitems_req[mitems_req.requirementStrength.notna()].requirementStrength.values

if 'requirementDexterity' in mitems_req:
    mitems_req.loc[mitems_req.requirementDexterity.notna(), 'requirementDex'] = mitems_req[mitems_req.requirementDexterity.notna()].requirementDexterity.values

if 'requirementIntelligence' in mitems_req:    
    mitems_req.loc[mitems_req.requirementIntelligence.notna(), 'requirementInt'] = mitems_req[mitems_req.requirementIntelligence.notna()].requirementIntelligence.values

mitems_req.drop(columns=['requirementStrength', 'requirementDexterity', 'requirementIntelligence'], inplace=True, errors='ignore')

mitems = pd.merge(mitems, mitems_req, left_index=True, right_index=True, how='left')

mitems.shape

(30863, 72)

In [82]:
# split mitems socket column into separate dataframe
for k, v in mitems.iterrows():
    item_sockets = v['sockets']
    if isinstance(item_sockets, list):
        for socket in item_sockets:
            socket['itemId'] = k
mitems_sockets = pd.DataFrame([socket for sockets in mitems[mitems.sockets.notna()].sockets.to_list() for socket in sockets])
mitems_sockets.drop(columns=['attr'], inplace=True)

In [83]:
mitems['nVeiledMods'] = mitems.veiledMods.apply(lambda y: len(y) if isinstance(y, list) else 0)

In [84]:
# useless variables
mitems.drop(columns=[
    'abyssJewel', 'additionalProperties', 'artFilename', 'cosmeticMods', 'craftedMods', 
    'delve', 'descrText', 'elder', 'enchantMods', 'explicitMods', 'extended', 'flavourText', 'fractured', 
    'fracturedMods', 'frameType', 'h', 'hybrid', 'icon', 'id', 'implicitMods', 'incubatedItem', 'influences',
    'inventoryId', 'isRelic', 'maxStackSize', 'name', 'nextLevelRequirements',
    'note', 'properties', 'prophecyText', 'requirements', 'secDescrText', 'shaper',
    'socketedItems', 'sockets', 'stackSize', 'stashNote', 'support', 'typeLine', 'utilityMods',
    'veiledMods', 'veiled', 'verified', 'w', 'x', 'y', 'itemLevel', 'sellingStackSize'
    ], inplace=True, errors='ignore')

In [85]:
mitems = mitems[sorted(mitems.columns)]
mitems.sample(5)

Unnamed: 0,category,corrupted,duplicated,identified,ilvl,influenceCrusader,influenceElder,influenceHunter,influenceRedeemer,influenceShaper,influenceWarlord,league,nPrefixes,nSuffixes,nVeiledMods,priceCurrency,priceQuantity,rarity,requirementDex,requirementInt,requirementLevel,requirementStr,sellingQuantity,subCategory,synthesised,talismanTier
182800,weapons,True,,True,70,True,,,,,,Delirium,3.0,1.0,0,chaos,10,rare,58.0,123.0,53.0,,,runedagger dagger,,
210456,jewels,,,True,61,,,,,,,Delirium,2.0,1.0,0,chaos,20,rare,,,,,,,,
227380,jewels,,,True,74,,,,,,,Delirium,2.0,2.0,0,chaos,2,rare,,,,,,,,
141806,accessories,,,True,47,,,,,,,Delirium,1.0,3.0,0,vaal,1,rare,,,29.0,,,ring,,
110874,armour,,,True,72,,,,,,,Delirium,2.0,3.0,0,chaos,5,rare,48.0,,51.0,48.0,,boots,,


In [86]:
mitems_sockets.sample(5)

Unnamed: 0,group,sColour,itemId
11053,1,B,89487
18718,0,G,144850
33091,0,B,242871
28648,0,B,213005
5819,0,R,46759


In [87]:
mitems_mods_voc.sample(5)

Unnamed: 0,text,modType
1166,#% increased Kinetic Blast Area of Effect,enchantMods
345,#% chance to Dodge Attack Hits if you have Blo...,explicitMods
1179,Skills Cost no Mana while Focussed,craftedMods
940,Socketed Gems are Supported by Level # Spell C...,explicitMods
426,Kinetic Blast has a #% chance for an additiona...,enchantMods


In [88]:
mitems_mods.sample(5)

Unnamed: 0,itemId,modId,value0,value1,value2
33053,58026,49,2.0,4.0,
100386,167038,156,37.0,,
116453,191767,33,31.0,,
150530,242833,179,3.0,33.0,
162781,263020,1,108.0,,


In [93]:
display(mitems_prop.sample(5))
display(mitems_prop_voc.sample(5))

Unnamed: 0,itemId,propId,value0,value1
10415,95195,9,22.0,
29622,244622,1,396.0,
28368,233757,13,14.0,29.0
13157,118207,1,106.0,
2361,19111,8,19.0,


Unnamed: 0,text
14,Quality (Life and Mana Modifiers)
6,Weapon Range
15,Quality (Defence Modifiers)
7,Armour
12,Quality (Attribute Modifiers)


In [72]:
temp = mitems.sample(1)

In [74]:
temp.T.to_dict()[168286]

{'category': 'armour',
 'corrupted': nan,
 'duplicated': nan,
 'identified': True,
 'ilvl': 86,
 'influenceCrusader': nan,
 'influenceElder': nan,
 'influenceHunter': nan,
 'influenceRedeemer': True,
 'influenceShaper': nan,
 'influenceWarlord': nan,
 'league': 'Delirium',
 'nPrefixes': 1.0,
 'nSuffixes': 3.0,
 'nVeiledMods': 0,
 'priceCurrency': 'chaos',
 'priceQuantity': 10.0,
 'rarity': 'rare',
 'requirementDex': nan,
 'requirementInt': nan,
 'requirementLevel': 60.0,
 'requirementStr': 101.0,
 'sellingQuantity': None,
 'subCategory': 'helmet',
 'synthesised': nan,
 'talismanTier': nan}

### Jewels

In [60]:
jewel = mitems[mitems.category=='jewels'].copy()
jewel.shape

(7361, 26)

In [74]:
jewel['isAbyss'] = jewel.subCategory.apply(lambda y: y == 'abyss')

In [78]:
jewel.drop(columns=['category', 'influenceCrusader', 'influenceElder', 'influenceHunter', 'influenceRedeemer', 
                    'influenceShaper', 'influenceWarlord', 'itemLevel', 'nVeiledMods', 'requirementDex', 
                    'requirementInt', 'requirementStr', 'subCategory', 'synthesised',
                    'talismanTier', 'veiled'],
          inplace=True, errors='ignore')

In [80]:
jewel.sample()

Unnamed: 0,corrupted,duplicated,identified,ilvl,league,nPrefixes,nSuffixes,priceCurrency,priceQuantity,rarity,requirementLevel,isAbyss
160356,,,True,74,Delirium,2.0,2.0,chaos,5,rare,49.0,True


In [None]:
{'rarity', 'n_suffixes', 'league', 'ilvl', 'identified', 'corrupted',
'sub_category', 'price_currency', 'duplicated', 'price_quantity',
'requirement_level', 'n_prefixes', 'category'}

### Accessories

In [118]:
accessory = mitems[mitems.category=='accessories'].copy()
accessory.shape

(9689, 26)

In [119]:
accessory.drop(columns=['category', 'itemLevel', 'requirementDex', 'requirementInt', 
                        'requirementStr'],
              inplace=True, errors='ignore')

In [120]:
accessory.columns

Index(['corrupted', 'duplicated', 'identified', 'ilvl', 'influenceCrusader',
       'influenceElder', 'influenceHunter', 'influenceRedeemer',
       'influenceShaper', 'influenceWarlord', 'league', 'nPrefixes',
       'nSuffixes', 'nVeiledMods', 'priceCurrency', 'priceQuantity', 'rarity',
       'requirementLevel', 'subCategory', 'synthesised', 'talismanTier',
       'veiled'],
      dtype='object')

### Weapons

In [144]:
weapon = mitems[mitems.category=='weapons'].copy()
weapon.shape

(3725, 25)

In [145]:
weapon.drop(columns=['talismanTier'],
              inplace=True, errors='ignore')

In [146]:
weapon.columns

Index(['category', 'corrupted', 'duplicated', 'identified', 'ilvl',
       'influenceCrusader', 'influenceElder', 'influenceHunter',
       'influenceRedeemer', 'influenceShaper', 'influenceWarlord', 'league',
       'nPrefixes', 'nSuffixes', 'nVeiledMods', 'priceCurrency',
       'priceQuantity', 'rarity', 'requirementDex', 'requirementInt',
       'requirementLevel', 'requirementStr', 'subCategory', 'synthesised',
       'veiled'],
      dtype='object')

### Armour

In [163]:
armour = mitems[mitems.category=='armour'].copy()
armour.shape

(9548, 25)

In [164]:
armour.drop(columns=['talismanTier'],
              inplace=True, errors='ignore')

In [165]:
armour.columns

Index(['category', 'corrupted', 'duplicated', 'identified', 'ilvl',
       'influenceCrusader', 'influenceElder', 'influenceHunter',
       'influenceRedeemer', 'influenceShaper', 'influenceWarlord', 'league',
       'nPrefixes', 'nSuffixes', 'nVeiledMods', 'priceCurrency',
       'priceQuantity', 'rarity', 'requirementDex', 'requirementInt',
       'requirementLevel', 'requirementStr', 'subCategory', 'synthesised',
       'veiled'],
      dtype='object')

## Currencies

In [12]:
currency = items[items.category=='currency'].copy()
currency.shape

(390, 33)

In [13]:
currency = currency[currency.extended.apply(lambda y: y['baseType'] in CURRENCY_NAME)]
currency['sell_currency'] = currency.extended.apply(lambda y: CURRENCY_KEY[CURRENCY_NAME.index(y['baseType'])])
currency.shape

(44, 34)

In [14]:
currency.sellingQuantity.fillna(1, inplace=True)

In [15]:
currency.drop(columns=['abyssJewel', 'additionalProperties', 'artFilename', 'corrupted', 'cosmeticMods', 
    'craftedMods', 'delve', 'descrText', 'duplicated', 'elder', 'enchantMods', 'explicitMods',
    'extended', 'flavourText', 'fractured', 'fracturedMods', 'frameType', 'h', 'hybrid', 
    'icon', 'id', 'identified', 'ilvl', 'implicitMods', 'incubatedItem', 'influences',
    'inventoryId', 'isRelic', 'itemLevel', 'name', 'nextLevelRequirements', 'note', 
    'properties', 'prophecyText', 'requirements', 'secDescrText', 'shaper', 'socketedItems', 'sockets',
    'stashNote', 'support', 'synthesised', 'talismanTier', 'typeLine', 'utilityMods', 'veiled', 
    'verified', 'veiledMods',  'w', 'x', 'y', 'subCategory', 'category', 'maxStackSize', 'stackSize'],
    inplace=True, errors='ignore')