In [1]:
import re

In [2]:
%run constants.ipynb
%run functions.ipynb
%run dictionnaries.ipynb

In [3]:
def extract_tags(request):
    sent = [list(r.keys())[0] for r in request]
    tags = [list(r.values())[0] for r in request]
    
    coding = [1 if t.endswith(CT_SUF_B)
              else -1 if t == CT_TAG_O 
              else 0 for t in tags]

    prev = -1
    chunk = {}
    res = []
    for ic,c in enumerate(coding):
        new_tag = False
        sam_tag = False
        if c == 1: new_tag = True
        if c == 0: sam_tag = True

        if new_tag:
            if len(chunk)>0: 
                chunk[tag] = " ".join(chunk[tag])
                res.append(chunk)
            tag = tags[ic].split(CT_SEP)[0]
            chunk = {}
            chunk[tag] = [sent[ic]]

        if sam_tag:
            chunk[tag] += [sent[ic]]
    
    if len(chunk)>0: 
        chunk[tag] = " ".join(chunk[tag])
        res.append(chunk)
    
    return res

def tag_to_filters(tags_values):
    compact = {}
    t2f = tag_to_filter()
    for d in tags_values:
        t,v = list(d.keys())[0], list(d.values())[0]
        if t in t2f:
            if t not in compact: 
                compact[t] = [v]
            else:
                f = t2f[t][CT_filt]
                is_list = t2f[t][CT_is_list]
                if is_list:
                    compact[t] += [v]
                else:
                    compact[t] = [compact[t][0] + ' ' + v]

    filters = init_filters()
    for t,v in compact.items():
        f = t2f[t][CT_filt]
        filters[f] += v

    return filters
    
def trans_date(date:str) :
    trans_month = {'01':['January','Jan.','Jan'], 
             '02':['February','Feb.','Feb'],
             '03':['March','Mar.','Mar'],
             '04':['April','Apr.','Apr'],
             '05':['May'],
             '06':['June', 'Jun.', 'Jun'],
             '07':['July','Jul.','Jul'],
             '08':['August','Aug.','Aug'],
             '09':['September','Sep.','Sep'],
             '10':['October','Oct.','Oct'],
             '11':['November','Nov.','Nov'],
             '12':['December','Dec.','Dec']}
    
    if re.match('\d{2}/\d{2}/\d{4}', date) != None:
        date_r = date.split('/')
        date_r = ''.join(date_r)
        return date_r 
    
    if re.match('\d{4}', date) != None:
        date_r= ['0101'+date,'1231'+date]
        return date_r
    
    if re.match('\d{2} \w+ \d{4}', date) != None:
        month = re.findall('[a-zA-Z_]+', date)
        for m in trans_month:
            if month[0].capitalize() in trans_month[m]:
                month = m
        day = re.findall('\d{2} ', date)
        day = day[0][0:2]
        year = re.findall('\d{4}', date)
        return month+day+year[0]
    
    if re.match('\w+ \d{4}', date) != None:
        year = re.findall('\d{4}', date)
        month = re.findall('[a-zA-Z_]+', date)
        for m in trans_month:
            if month[0].capitalize() in trans_month[m]:
                return ('O1'+m+year[0], '31'+m+year[0])
        if month[0].lower() == "winter": 
            return ['O1'+'12'+year[0], '28'+'02'+year[0]]
        if month[0].lower() == "spring": 
            return ['O1'+'03'+year[0], '31'+'05'+year[0]]
        if month[0].lower() == "summer": 
            return ['O1'+'06'+year[0], '31'+'08'+year[0]]
        if month[0].lower() == "autumn": 
            return ['O1'+'09'+year[0], '30'+'11'+year[0]]

        
def apply_date(filtre):
    
    if len(filtre['date']) == 1:
        filtre['date'] = trans_date(filtre['date'][0])
    elif len(filtre['date']) == 2:
        date1 = filtre['date'][0]
        date2 = filtre['date'][1]
        date1 = trans_date(date1)[0]
        date2 = trans_date(date2)[1]
        filtre['date'] = [date1, date2]
    return(filtre)

In [4]:
request = [{'in': 'O'},
 {'Sweden': 'COUN_B'},
 {'France': 'COUN_B'},
 {'from': 'O'},
 {'SPRING': 'DATE1_B'},
 {'1902': 'DATE1_B'},
 {'to': 'O'},
 {'AUTUMN': 'DATE2_B'},
 {'2017': 'DATE2_E'},
 {'cloud': 'STAT_B'},
 {'of': 'STAT_I'},
 {'words': 'STAT_E'},
 {'for': 'O'},
 {'Pilatus': 'MANU_B'},
{'Pilatus': 'O'}]

In [5]:
request

[{'in': 'O'},
 {'Sweden': 'COUN_B'},
 {'France': 'COUN_B'},
 {'from': 'O'},
 {'SPRING': 'DATE1_B'},
 {'1902': 'DATE1_B'},
 {'to': 'O'},
 {'AUTUMN': 'DATE2_B'},
 {'2017': 'DATE2_E'},
 {'cloud': 'STAT_B'},
 {'of': 'STAT_I'},
 {'words': 'STAT_E'},
 {'for': 'O'},
 {'Pilatus': 'MANU_B'},
 {'Pilatus': 'O'}]

In [6]:
tags_values = extract_tags(request)
filters = tag_to_filters(tags_values)
filters

{'manufacturer': ['Pilatus'],
 'aircraft': [],
 'company': [],
 'country': ['Sweden', 'France'],
 'category': [],
 'date': ['SPRING 1902', 'AUTUMN 2017']}

In [7]:
apply_date(filters)

{'manufacturer': ['Pilatus'],
 'aircraft': [],
 'company': [],
 'country': ['Sweden', 'France'],
 'category': [],
 'date': ['O1031902', '30112017']}

In [8]:
event = {
    CT_tabs : CT_tabs_default,
    CT_filt : filters,
}
event

{'tab': 'default',
 'filters': {'manufacturer': ['Pilatus'],
  'aircraft': [],
  'company': [],
  'country': ['Sweden', 'France'],
  'category': [],
  'date': ['O1031902', '30112017']}}