## RBN descriptive statistics
It is required to run **python main.py** before using this notebook.

In [1]:
import pickle
import utils

In [2]:
input_path = 'output/rbn.p'
with open(input_path, 'rb') as infile:
    le_objs = pickle.load(infile)

## Number of senses

In [3]:
len(le_objs)

127208

## Lemma statistics

In [4]:
lemma_stats =  utils.compute_stats_about(le_objs, attributes={'lemma'}, verbose=1)
print('count', lemma_stats['count'])
print('# of unique observations', lemma_stats['# of unique observations'])
lemma_stats['freq_dist_df'].head()

chosen attributes: {'lemma'}
count 127208
# of unique observations 100002


Unnamed: 0,lemma,frequency
69997,"(slag,)",18
26273,"(gaan,)",17
7076,"(afzetten,)",17
34050,"(houden,)",17
79681,"(uitlopen,)",16


## Part of speech statistics

In [9]:
pos_stats =  utils.compute_stats_about(le_objs, attributes={'pos'}, verbose=1)
print('count', pos_stats['count'])
print('# of unique observations', pos_stats['# of unique observations'])
pos_stats['freq_dist_df'].head()

chosen attributes: {'pos'}
count 127208
# of unique observations 6


Unnamed: 0,pos,frequency
0,"(n,)",85360
3,"(v,)",16504
1,"(a,)",15345
5,"(None,)",9396
2,"(r,)",376


## Lemma and part of speech statistics

In [10]:
lemma_pos_stats =  utils.compute_stats_about(le_objs, attributes={'lemma', 'pos'}, verbose=1)
print('count', lemma_pos_stats['count'])
print('# of unique observations', lemma_pos_stats['# of unique observations'])
lemma_pos_stats['freq_dist_df'].head()

chosen attributes: {'pos', 'lemma'}
count 127208
# of unique observations 101287


Unnamed: 0,pos-lemma,frequency
71029,"(n, slag)",18
34636,"(v, houden)",17
7294,"(v, afzetten)",17
26760,"(v, gaan)",17
79590,"(v, trekken)",16


## Single-words and multi-words
Note: multi-words are typically idioms.

In [11]:
mw_stats =  utils.compute_stats_about(le_objs, attributes={'mw'}, verbose=1)
print('count', mw_stats['count'])
print('# of unique observations', mw_stats['# of unique observations'])
mw_stats['freq_dist_df'].head()

chosen attributes: {'mw'}
count 127208
# of unique observations 2


Unnamed: 0,mw,frequency
0,"(False,)",117812
1,"(True,)",9396


## RBN type

In [12]:
type_stats =  utils.compute_stats_about(le_objs, attributes={'rbn_type'}, verbose=1)
print('count', type_stats['count'])
print('# of unique observations', type_stats['# of unique observations'])
type_stats['freq_dist_df'].head()

chosen attributes: {'rbn_type'}
count 127208
# of unique observations 7


Unnamed: 0,rbn_type,frequency
0,"(None,)",116540
1,"(action,)",7787
2,"(process,)",1726
3,"(state,)",1077
4,"(,)",66


## RBN feature set

In [14]:
feature_set_stats =  utils.compute_stats_about(le_objs, attributes={'rbn_feature_set'}, verbose=1)
print('count', feature_set_stats['count'])
print('# of unique observations', feature_set_stats['# of unique observations'])
feature_set_stats['freq_dist_df']

chosen attributes: {'rbn_feature_set'}
count 127208
# of unique observations 24


Unnamed: 0,rbn_feature_set,frequency
0,"(None,)",116540
1,"(,)",5312
2,"(action2,)",1667
3,"(cognt2,)",1083
5,"(process1,)",520
14,"(action1,)",470
13,"(state2,)",278
11,"(cognt3,)",201
22,"(cognt1,)",190
10,"(state1,)",188


## Separable verbs

In [15]:
separable_stats =  utils.compute_stats_about(le_objs, attributes={'separable'}, verbose=1)
print('count', separable_stats['count'])
print('# of unique observations', separable_stats['# of unique observations'])
separable_stats['freq_dist_df']

chosen attributes: {'separable'}
count 127208
# of unique observations 3


Unnamed: 0,separable,frequency
0,"(None,)",110704
1,"(False,)",12376
2,"(True,)",4128
