# Init

In [1]:
# Standard imports 
from bokeh.io import output_notebook, show
output_notebook()
# Libs version
from IPython import __version__ as ipython_version
from pandas import __version__ as pandas_version
from bokeh import __version__ as bokeh_version
print("IPython\t- %s" % ipython_version)
print("Pandas\t- %s" % pandas_version)
print("Bokeh\t- %s" % bokeh_version)


IPython	- 7.1.1
Pandas	- 0.24.0
Bokeh	- 1.0.4


# Get current data

In [2]:
import requests

url = 'http://188.226.197.213:3000/cache/medicines.json'
response = requests.get(url).json()
version = response['version']
data = response['data']
print("Version\t- %s " % version)
print("Count\t- %s " % len(data))

Version	- 1549843205 
Count	- 7608 


In [3]:
import re
from functools import reduce
from math import floor

def update_package_size(item_input_main):

    def parseFloat(v):
        result = 0.0
        try:
            result = float(v.replace(',','.').replace('(','').replace(')',''))
        except:
            result = 0.0
        return result

    size_field_pattern = re.compile('^([0-9 x,\.\-_\(\)\?]+)(.*)$', re.IGNORECASE)

    size_value_classifier = (
        (
            lambda groups: ('n*m', floor(parseFloat(groups[0])*parseFloat(groups[1])), ''+groups[0]+'x'+groups[1]),
            re.compile('^([0-9,\.\(\)]+)[ px\-\_\(\)]+([0-9,\.\(\)]+)$', re.IGNORECASE)
        ),
        (
            lambda groups: ('int', int(groups[0].replace(' ','')), ''+groups[0].replace(' ','')),
            re.compile('^([0-9]+)$', re.IGNORECASE)
        ),
        (
            lambda groups: ('float', parseFloat(groups[0]), ''+groups[0].replace(',','.')),
            re.compile('^([0-9,\.]+)$', re.IGNORECASE)
        ),
        (
            lambda groups: ('no_data', 0, '-'),
            re.compile('^\?$', re.IGNORECASE)
        ),
        (
            lambda groups: ('not_parsed', 0, ''+groups[0]),
            re.compile('^(.*)$', re.IGNORECASE)
        )
    )
    size_type_classifier = (
        ('ampoule(s)',    re.compile('^[, _\.]*tubuler.*', re.IGNORECASE)),
        ('dose(s)',       re.compile('^[, _\.]*doser.*', re.IGNORECASE)),
        ('dose(s)',       re.compile('^[, _\.]*dosis.*', re.IGNORECASE)),    
        ('piece(s)',      re.compile('.+blister.*', re.IGNORECASE)),    
        ('piece(s)',      re.compile('^[, _\.]*stk.*', re.IGNORECASE)),
        ('milliliter(s)', re.compile('^[, _\.]*ml.*', re.IGNORECASE)),
        ('gram(s)',       re.compile('^[, _\.]*g.*', re.IGNORECASE)),
        ('htgl',          re.compile('^[, _\.]*htgl.*', re.IGNORECASE)),
        ('no_data',        re.compile('^no.*data', re.IGNORECASE)),
        ('-',             re.compile('^.*', re.IGNORECASE)),
    )

    SIZE_TYPES_INDEX = {
        '-':             0,
        'ampoule(s)':    1,
        'dose(s)':       2,
        'piece(s)':      3,
        'milliliter(s)': 4,
        'gram(s)':       5,
        'htgl':          6,
        '????':          7,
        'no_data':       8,
    }

    def classify_value(val):
        value_reducer = (lambda result, pattern: pattern[0](pattern[1].match(val).groups()) if result == None and pattern[1].match(val) != None else result)
        return reduce(value_reducer, size_value_classifier, None)    

    def classify_type(val):
        type_reducer = (lambda result, pattern: pattern[0] if result == None and pattern[1].match(val) != None else result)
        return reduce(type_reducer, size_type_classifier, None)

    package_size_raw = item_input_main['package_size_raw'] if 'package_size_raw' in item_input_main else item_input_main['package_size']
    package_size_raw = '? no_data' if package_size_raw == None or package_size_raw == 'None' else package_size_raw
    result = dict()
    result.update(item_input_main)
    s = size_field_pattern.match(package_size_raw)
    if s != None and len(s.groups()) == 2:
        g = s.groups()
        cvalue = classify_value(g[0].strip())
        cvalue = ('no_data', 0, '-') if cvalue == None else cvalue
        ctype = classify_type(g[1].strip())
        ctype = 'no_data' if ctype == None else ctype
        cidx = int(SIZE_TYPES_INDEX[ctype])
        chex = '0x' + format(2 ** cidx, '06x')
        result.update({
            'package_size_value': cvalue[1],
            'package_size_unit_hex': chex ,
            'package_size_unit_index': cidx,
            'package_size_unit': ctype,
            'package_size_string': ''+cvalue[2]+' '+ctype,
            'package_size_raw': package_size_raw,
            '__package_size_debug__class_type': cvalue[0],
        })
    return result

In [4]:
data_ps = list(map(update_package_size, data))

In [5]:
mk_pattern = re.compile('^([0-9 x,\.\-_\(\)\?]+)(.*)$', re.IGNORECASE)


data_set_ps = set(map(lambda v: v['package_size_unit'] if 'package_size_unit' in v else None, data_ps))
data_set_mk = set(map(lambda v: v['medicine_kind'] if 'medicine_kind' in v else None, data_ps))

data_set_mk
# print(data_ps_size_unit_unq.index('0x10'))
#     if item in data_ps_size_unit_unq:
#         pass
#     data_ps_size_unit_unq.append(item)

# med_psuh = {medicine2['package_size'] for medicine2 in data_ps}
# med_mk = {medicine['medicine_kind'] for medicine in data_ps}

# (len(med_mk), len(med_psuh))
# data_ps_size_unit_unq[:2]


{None,
 'brusetabletter',
 'creme',
 'dentalsuspension',
 'dep.inj.vsk.pul.susp',
 'depotgra.oralsus.end',
 'depotgran.,endos.beh',
 'depotgranulat',
 'depotinj.vsk., susp.',
 'depotkapsler',
 'depotkapsler, hårde',
 'depotplastre',
 'depottabletter',
 'depotøjendråber',
 'dispergible tabl.',
 'endotra.pulm.inst.su',
 'enteralgel',
 'enterodepotgran.brv.',
 'enterodepottabletter',
 'enterogran. oral sus',
 'enterogranulat',
 'enterokapsler',
 'enterokapsler, hårde',
 'enterotabletter',
 'filmovertrukne tabl.',
 'frysetørret tablet',
 'gel',
 'gel, brev',
 'gra. oral sus.,endos',
 'gran. til drikkevand',
 'gran. til oral opl.',
 'gran. til oral susp.',
 'granulat',
 'halsbånd',
 'hæmodiafiltr.væske',
 'hæmofiltr./-dial.vsk',
 'hæmofiltreringsvæske',
 'implantat',
 'implantat, inj.spr.',
 'imprægneret gaze',
 'inf.vsk opl/oral opl',
 'inf.væske, emulsion',
 'inf.væske, isotonisk',
 'inf.væske, opløsning',
 'inf.v�ske, opl�sning',
 'infusionsvæske',
 'inh.pulver i kapsler',
 'inh.vsk.,neb

In [6]:
# Google Drive API
client_id = "142887107151-depemnnp9g4nvsi2j8eap63q3flr0e0o.apps.googleusercontent.com"
client_secret = "cUq-lPM6Ly1bpLUEBc8krvAO"


In [7]:
# from pandas import DataFrame
# from bokeh.models import ColumnDataSource, HoverTool
# from bokeh.plotting import figure
# from bokeh.transform import factor_cmap

# df = DataFrame(data_ps)
# df.medicine_kind = df.medicine_kind.astype(str)
# df.package_size_unit = df.package_size_unit.astype(str)

# kind_factors = sorted(df.medicine_kind.unique())
# colors = ['#2b83ba', '#abdda4', '#ffffbf', '#fdae61', '#d7191c']

# kind_pallete = list()
# for i in range(len(kind_factors)):
#     kind_pallete.append(colors[i % 5])

# index_cmap = factor_cmap('medicine_kind_package_size_unit', 
#                          palette=kind_pallete, 
#                          factors=kind_factors, end=1)

# group = df.groupby(by=['medicine_kind', 'package_size_unit'])
# source = ColumnDataSource(group)

# p = figure(plot_width=800, plot_height=300, title="Package Size by Kind and Unit", 
#            x_range=group, toolbar_location=None, tools="")

# p.xgrid.grid_line_color = None
# p.xaxis.axis_label = "Unit grouped by Kind"
# p.xaxis.major_label_orientation = 1.2

# p.vbar(x='medicine_kind_package_size_unit', top='package_size_value_std', width=1, 
#        source=source, line_color="white", fill_color=index_cmap, 
#        hover_line_color="darkgrey", hover_fill_color=index_cmap)

# p.add_tools(HoverTool(tooltips=[("Package size", "@package_size_value_std"), ("Kind, Unit", "@kind_unit")]))
# show(p)

In [8]:

# result = build_sizes_plot(data_ps[:10])
# print(result)
