In [1]:
MANUFACTURING       = 1
RESEARCH_TECHNOLOGY = 2 # Deprecated
RESEARCH_TIME       = 3
RESEARCH_MATERIAL   = 4
COPYING             = 5
DUPLICATING         = 6 # Deprecated
REVERSE_ENGINEERING = 7 # Deprecated
INVENTION           = 8
REACTION            = 11

In [2]:
import os

import numpy as np
import pandas as pd

In [3]:
raw_items = pd.read_csv("resources/invTypes.csv")

raw_times = pd.read_csv("resources/industryActivity.csv")
raw_materials = pd.read_csv("resources/industryActivityMaterials.csv")
raw_invention = pd.read_csv("resources/industryActivityProbabilities.csv")
raw_products = pd.read_csv("resources/industryActivityProducts.csv")
raw_runs = pd.read_csv("resources/industryBlueprints.csv")

In [4]:
def collect_usable_blueprints(raw_items, raw_products):
    usable_blueprints = raw_items.join(raw_products.set_index("typeID"), how = "inner", on = "typeID")
    usable_blueprints = usable_blueprints[usable_blueprints["published"] == 1]["typeID"]
    
    return usable_blueprints.unique()

In [5]:
def collect_usable_items(items, materials, products):    
    materials_ids = materials["materialTypeID"].to_numpy()
    products_ids = products[products["activityID"] != INVENTION]["productTypeID"].to_numpy()
    
    materials_ids = pd.DataFrame(materials_ids, columns = ["id"])
    product_ids = pd.DataFrame(products_ids, columns = ["id"])
    
    materials_ids = items.join(materials_ids.set_index("id"), how = "inner", on = "typeID")["typeID"].to_numpy()
    product_ids = items.join(product_ids.set_index("id"), how = "inner", on = "typeID")["typeID"].to_numpy()
    
    usable_items = np.unique(np.concatenate((materials_ids, products_ids)))
    return usable_items

In [6]:
def filter_blueprints(usable_blueprints, raw_times, raw_materials, raw_invention, raw_products, raw_runs):
    usable_blueprints = pd.DataFrame(usable_blueprints, columns = ["typeID"])
    
    times = raw_times.join(usable_blueprints.set_index("typeID"), how = "inner", on = "typeID")
    materials = raw_materials.join(usable_blueprints.set_index("typeID"), how = "inner", on = "typeID")
    invention = raw_invention.join(usable_blueprints.set_index("typeID"), how = "inner", on = "typeID")
    products = raw_products.join(usable_blueprints.set_index("typeID"), how = "inner", on = "typeID")
    runs = raw_runs.join(usable_blueprints.set_index("typeID"), how = "inner", on = "typeID")
    
    return times, materials, invention, products, runs

In [7]:
usable_blueprints = collect_usable_blueprints(raw_items, raw_products)
print(f"Found {usable_blueprints.shape[0]} usable in-game blueprints.")

Found 3789 usable in-game blueprints.


In [8]:
times, materials, invention, products, runs = filter_blueprints(
    usable_blueprints, raw_times, raw_materials, raw_invention, raw_products, raw_runs)

In [9]:
usable_items = collect_usable_items(raw_items, materials, products)
print(f"Found {usable_items.shape[0]} usable in-game items.")

Found 4161 usable in-game items.


In [10]:
def append_source_blueprints(usable_items, products):
    products = products[["typeID", "productTypeID"]].rename(columns = {"typeID": "blueprintTypeID"})
    products["blueprintTypeID"] = products["blueprintTypeID"].astype(np.str)
    
    return usable_items.join(products.set_index("productTypeID"), how = "left", on = "typeID")


def prepare_items(usable_items, all_items, products):
    usable_items = pd.DataFrame(usable_items, columns = ["typeID"])
    usable_items = append_source_blueprints(usable_items, products)
    usable_items = usable_items.join(
        all_items[["typeID", "volume", "typeName"]].set_index("typeID"),
        how = "inner",
        on = "typeID"
    )
    
    usable_items["typeName"] = usable_items["typeName"].str.strip()
    
    return usable_items.rename(columns = {
        "typeID": "ID",
        "blueprintTypeID": "blueprintID",
        "volume": "volume",
        "typeName": "name"
    })

In [11]:
prepared_items = prepare_items(usable_items, raw_items, products)

In [12]:
prepared_items

Unnamed: 0,ID,blueprintID,volume,name
0,34,,0.01,Tritanium
1,35,,0.01,Pyerite
2,36,,0.01,Mexallon
3,37,,0.01,Isogen
4,38,,0.01,Nocxium
...,...,...,...,...
4156,62044,,0.01,Strange Matter Component Z-167
4157,62045,,0.01,Strange Matter Component X-71
4158,62055,62060,0.10,Expired Mysterious Warp Matrix Filament
4159,62056,62063,0.10,Expired Dangerous Warp Matrix Filament


In [13]:
def prepare_blueprints(raw_times, materials, products, runs):
    manufacture_times = times[(times["activityID"] == MANUFACTURING) | (times["activityID"] == REACTION)][["typeID", "time"]]
    
    copy_times = times[times["activityID"] == COPYING][["typeID", "time"]]
    time_research_times = times[times["activityID"] == RESEARCH_TIME][["typeID", "time"]]
    material_research_times = times[times["activityID"] == RESEARCH_MATERIAL][["typeID", "time"]]
    
    manufacture_products = products[products["activityID"] != INVENTION][["typeID", "productTypeID", "quantity"]]
    manufacture_materials = materials[materials["activityID"] != INVENTION][["typeID", "materialTypeID", "quantity"]]
    
    manufacture_data = manufacture_materials.groupby(["typeID"], as_index = False).agg(
        {"materialTypeID": list, "quantity": list}
    )
    
    manufacture_data = manufacture_data.join(
        manufacture_times.set_index("typeID"), how = "inner", on = "typeID")
    manufacture_data = manufacture_data.join(
        time_research_times.set_index("typeID"), how = "inner", on = "typeID", rsuffix = "TimeEfficiency")
    manufacture_data = manufacture_data.join(
        material_research_times.set_index("typeID"), how = "inner", on = "typeID", rsuffix = "MaterialEfficiency")
    manufacture_data = manufacture_data.join(
        copy_times.set_index("typeID"), how = "inner", on = "typeID", rsuffix = "Copy")
    manufacture_data = manufacture_data.join(
        runs.set_index("typeID"), how = "inner", on = "typeID")
    manufacture_data = manufacture_data.join(
        manufacture_products.set_index("typeID"), how = "inner", on = "typeID", rsuffix = "Product")
    manufacture_data["materialsCount"] = manufacture_data["quantity"].str.len()
    manufacture_data = manufacture_data.join(
        raw_items[["typeID", "typeName"]].set_index("typeID"), how = "inner", on = "typeID")
    manufacture_data["typeName"] = manufacture_data["typeName"].str.strip()
    
    return manufacture_data.rename(columns = {
        "typeID": "ID",
        "materialTypeID": "materialsID",
        "quantity": "quantities",
        "time": "manufactureTime",
        "timeTimeEfficiency": "timeResearchTime",
        "timeMaterialEfficiency": "materialResearchTime",
        "timeCopy": "copyTime",
        "maxProductionLimit": "maxCopyRuns",
        "productTypeID": "productID",
        "quantityProduct": "productQuantity",
        "materialsCount": "materialsCount",
        "typeName": "name"
    })

In [14]:
prepared_blueprints = prepare_blueprints(times, materials, products, runs)

In [15]:
prepared_blueprints

Unnamed: 0,ID,materialsID,quantities,manufactureTime,timeResearchTime,materialResearchTime,copyTime,maxCopyRuns,productID,productQuantity,materialsCount,name
0,683,"[34, 35, 36, 37]","[24000, 4500, 1875, 375]",6000,2100,2100,4800,30,582,1,4,Bantam Blueprint
1,684,"[34, 35, 36, 37]","[32000, 6000, 2500, 500]",6000,2100,2100,4800,30,583,1,4,Condor Blueprint
2,685,"[34, 35, 36, 37]","[24000, 4500, 1875, 375]",6000,2100,2100,4800,30,584,1,4,Griffin Blueprint
3,686,"[34, 35, 36, 37, 38, 39, 40]","[405000, 135000, 27000, 7500, 1125, 263, 105]",12000,4200,4200,9600,10,620,1,7,Osprey Blueprint
4,687,"[34, 35, 36, 37, 38, 39, 40]","[540000, 180000, 36000, 10000, 1500, 350, 140]",12000,4200,4200,9600,10,621,1,7,Caracal Blueprint
...,...,...,...,...,...,...,...,...,...,...,...,...
3752,61226,[38],[752],300,105,105,240,600,61206,1,1,Uncommon Moon Mining Crystal Type C I Blueprint
3755,61229,[38],[661],300,105,105,240,600,61209,1,1,Rare Moon Mining Crystal Type B I Blueprint
3756,61230,[38],[794],300,105,105,240,600,61210,1,1,Rare Moon Mining Crystal Type C I Blueprint
3759,61233,[38],[696],300,105,105,240,600,61213,1,1,Exceptional Moon Mining Crystal Type B I Bluep...


In [16]:
def prepare_invention(times, materials, products):
    invent_times = times[times["activityID"] == INVENTION][["typeID", "time"]]
    invent_products = products[products["activityID"] == INVENTION][["typeID", "productTypeID", "quantity"]]
    invent_materials = materials[materials["activityID"] == INVENTION][["typeID", "materialTypeID", "quantity"]]
    
    invent_data = invent_materials.groupby(["typeID"], as_index = False).agg(
        {"materialTypeID": list, "quantity": list}
    )
    
    invent_data = invent_data.join(
        invent_times.set_index("typeID"), how = "inner", on = "typeID")
    invent_data = invent_data.join(
        invent_products.set_index("typeID"), how = "inner", on = "typeID", rsuffix = "Product")
    invent_data["materialsCount"] = invent_data["quantity"].str.len()
    
    return invent_data.rename(columns = {
        "typeID": "originID",
        "materialTypeID": "materialsID",
        "quantity": "quantities",
        "time": "inventionTime",
        "productTypeID": "inventedID",
        "quantityProduct": "blueprintRuns",
        "materialsCount": "materialsCount"
    })

In [17]:
prepared_invention = prepare_invention(times, materials, products)

In [18]:
prepared_invention

Unnamed: 0,originID,materialsID,quantities,inventionTime,inventedID,blueprintRuns,materialsCount
0,683,"[20416, 25887]","[2, 2]",63900,39581,1,2
1,684,"[20411, 25887]","[2, 2]",63900,11177,1,2
1,684,"[20411, 25887]","[2, 2]",63900,11179,1,2
2,685,"[20418, 25887]","[2, 2]",63900,11195,1,2
3,686,"[20416, 25887]","[8, 8]",128100,11986,1,2
...,...,...,...,...,...,...,...
997,61226,"[20411, 20413]","[12, 12]",3300,61228,10,2
998,61229,"[20411, 20413]","[8, 8]",3300,61231,10,2
999,61230,"[20411, 20413]","[16, 16]",3300,61232,10,2
1000,61233,"[20411, 20413]","[8, 8]",3300,61235,10,2


In [19]:
os.makedirs("prepared", exist_ok = True)

prepared_items.to_csv("prepared/item_data.csv", na_rep = "0", index = False, encoding = "ascii")
prepared_blueprints.to_csv("prepared/blueprint_data.csv", na_rep = "0", index = False, encoding = "ascii")
prepared_invention.to_csv("prepared/invention_data.csv", na_rep = "0", index = False, encoding = "ascii")