# Crawl TRIZ Effects Database

This is an example showcasing how to use post request to get data.

http://wbam2244.dns-systems.net//EDB_Welcome.php

In [1]:
import requests
from itertools import product
from collections import defaultdict
from bs4 import BeautifulSoup

In [2]:
# type of effect
modes = ['Function', 'Parameter', 'Transform']
# base url of Effects Database
base_url = 'http://wbam2244.dns-systems.net//EDB_display_results.php'

In [10]:
def get_mode_items(html):
    def get_items(paramtype):
        css_sel = 'td.{0}_item'.format(paramtype)
        tds = soup.select(css_sel)
        task_item_names = [item.strip() for td in tds for item in td.text.strip().splitlines()]
        task_item_sels = soup.select(css_sel +' input')
        task_item_values = [int(sel.attrs['value']) for sel in task_item_sels]
        return list(zip(task_item_names, task_item_values))

    soup = BeautifulSoup(html, 'lxml')
    task_items = get_items('task')
    target_items = get_items('target')

    for (taskName, taskValue), (targetName, targetValue) in product(task_items, target_items):
        print(taskName, targetName)

    return task_items, target_items

Test get_mode_items function

In [14]:
res = requests.post('http://wbam2244.dns-systems.net//EDB_select_task_target.php', data={'mode_button': 'Function'})
task_items, target_items = get_mode_items(res.text)
task_items, target_items

([('Absorb', 11),
  ('Accumulate', 12),
  ('Bend', 14),
  ('Break Down', 16),
  ('Change Phase', 17),
  ('Clean', 18),
  ('Compress', 19),
  ('Concentrate', 20),
  ('Condense', 21),
  ('Constrain', 22),
  ('Cool', 23),
  ('Deposit', 24),
  ('Destroy', 25),
  ('Detect', 26),
  ('Dilute', 27),
  ('Dry', 29),
  ('Evaporate', 30),
  ('Expand', 31),
  ('Extract', 32),
  ('Freeze', 33),
  ('Heat', 34),
  ('Hold', 35),
  ('Join', 36),
  ('Melt', 37),
  ('Mix', 38),
  ('Move', 39),
  ('Orient', 41),
  ('Produce', 43),
  ('Protect', 44),
  ('Purify', 45),
  ('Remove', 46),
  ('Resist', 47),
  ('Rotate', 48),
  ('Separate', 49),
  ('Vibrate', 51)],
 [('Divided Solid', 3), ('Field', 6), ('Gas', 5), ('Liquid', 4), ('Solid', 2)])

In [18]:
info = {}
for mode in modes:
    res = requests.post('http://wbam2244.dns-systems.net//EDB_select_task_target.php', data={'mode_button': mode})
    task_items, target_items = get_mode_items(res.text)
    info[mode] = {
        'task': task_items,
        'target': target_items
    }
info

{'Function': {'target': [('Divided Solid', 3),
   ('Field', 6),
   ('Gas', 5),
   ('Liquid', 4),
   ('Solid', 2)],
  'task': [('Absorb', 11),
   ('Accumulate', 12),
   ('Bend', 14),
   ('Break Down', 16),
   ('Change Phase', 17),
   ('Clean', 18),
   ('Compress', 19),
   ('Concentrate', 20),
   ('Condense', 21),
   ('Constrain', 22),
   ('Cool', 23),
   ('Deposit', 24),
   ('Destroy', 25),
   ('Detect', 26),
   ('Dilute', 27),
   ('Dry', 29),
   ('Evaporate', 30),
   ('Expand', 31),
   ('Extract', 32),
   ('Freeze', 33),
   ('Heat', 34),
   ('Hold', 35),
   ('Join', 36),
   ('Melt', 37),
   ('Mix', 38),
   ('Move', 39),
   ('Orient', 41),
   ('Produce', 43),
   ('Protect', 44),
   ('Purify', 45),
   ('Remove', 46),
   ('Resist', 47),
   ('Rotate', 48),
   ('Separate', 49),
   ('Vibrate', 51)]},
 'Parameter': {'target': [('Brightness', 7),
   ('Colour', 8),
   ('Concentration', 36),
   ('Density', 9),
   ('Drag', 10),
   ('Electrical Conductivity', 11),
   ('Energy', 12),
   ('Fluid Flo

Development example

In [19]:
data = {
    'selected_task': 11,
    'selected_target': 3,
    'filter': 'both',
    'Preserved_Mode_Name': 'Function'
}
res = requests.post(base_url, data=data)

In [20]:
def get_suggestions(html):
    soup = BeautifulSoup(html, 'lxml')
    table = soup.select('div#main > table')[-1]
    nodes = table.select('td')
    result = [node.text.strip() for node in nodes]
    # grouped list items 3 by 3
    suggestions = list(zip(*[iter(result)]*3))
    return suggestions

Extract all effects

In [21]:
effects = defaultdict(dict)
for mode, modeInfo in info.items():
    for task, target in product(modeInfo['task'], modeInfo['target']):
        data['Preserved_Mode_Name'] = mode
        data['selected_task'] = task[1]
        data['selected_target'] = target[1]
        res = requests.post('http://wbam2244.dns-systems.net//EDB_display_results.php', data=data)
        effects[mode][task[0], target[0]] = get_suggestions(res.text)

### Dump the result in a json file

UltraJSON is an ultra fast JSON encoder and decoder written in pure C with bindings for Python 2.5+ and 3.

For a more painless day to day C/C++ JSON decoder experience please checkout ujson4c, based on UltraJSON

In [22]:
try:
    import ujson as json
except:
    import json

In [None]:
ujson.dump(effects, open('effects.json', 'w'), ensure_ascii=False, indent=2)