# Statistics Explained  articles and matches with OECD's Glossary

## Objective: to build a common vocabulary and construct "profiles" of the terms of this vocabulary


In [11]:
import re
import pandas as pd
import spacy
import sys
from collections import Counter

## Run to install the language library, then comment-out
#!{sys.executable} -m spacy download en_core_web_lg

nlp = spacy.load('en_core_web_lg')
nlp.max_length = 1500000
print('Finished loading.')


Finished loading.


In [12]:
from datetime import datetime

def file_name(pre,ext):
    current_time = datetime.now() 
    return pre + '_'+ str(current_time.month)+ '_' + str(current_time.day) + \
                 '_' + str(current_time.hour)+ '_' + str(current_time.minute)  +'.'+ext

In [13]:
import pyodbc
c = pyodbc.connect('DSN=Virtuoso All;DBA=ESTAT;UID=kimon;PWD=RkhvQYZ442e2JVXLHdtW')
cursor = c.cursor()

In [14]:
import re
#import unicodedata as ud

def clean(x, quotes=True):
    if pd.isnull(x): return x  
    x = x.strip()
    
    ## make letter-question mark-letter -> letter-quote-space-letter !!! but NOT in the lists of URLs!!!
    if quotes:
        x = re.sub(r'([A-Za-z])\?([A-Za-z])','\\1\' \\2',x) 
    
    ## make letter-question mark-space lower case letter letter-quote-space letter
    x = re.sub(r'([A-Za-z])\? ([a-z])','\\1\' \\2',x) 

    ## delete ,000 commas in numbers    
    x = re.sub(r'\b(\d+),(\d+)\b','\\1\\2',x) ## CORRECTED
    
    ## delete  000 spaces in numbers
    x = re.sub(r'\b(\d+) (\d+)\b','\\1\\2',x) ## CORRECTED
    
    ## remove more than one spaces
    x = re.sub(r' +', ' ',x)
    
    ## remove start and end spaces
    x = re.sub(r'^ +| +$', '',x,flags=re.MULTILINE) 
    
    ## space-comma -> comma
    x = re.sub(r' \,',',',x)
    
    ## space-dot -> dot
    x = re.sub(r' \.','.',x)
    
    x = re.sub(r'â.{2}',"'",x) ### !!! NEW: single quotes are read as: âXX
    
    #x = x.encode('latin1').decode('utf-8') ## â\x80\x99
    #x = ud.normalize('NFKD',x).encode('ascii', 'ignore').decode()
    
    return x

### Statistics explained articles

* IDs, titles from dat_link_info, with resource_information_id=1, i.e. Eurostat (see ESTAT.V1.mod_resource_information) and matching IDs from dat_article.
* Carry out data cleansing on titles.


In [15]:
SQLCommand = """SELECT id, title 
                FROM ESTAT.V1.dat_link_info 
                WHERE resource_information_id=1 AND id IN (SELECT id FROM ESTAT.V1.dat_article) """

SE_df = pd.read_sql(SQLCommand,c)

SE_df['title'] = SE_df['title'].apply(clean)
SE_df.head(5)


Unnamed: 0,id,title
0,7,Accidents at work statistics
1,13,National accounts and GDP
2,16,Railway safety statistics in the EU
3,17,Railway freight transport statistics
4,18,Railway passenger transport statistics - quart...


### Add paragraphs titles and contents

* From dat_article_paragraph with abstract=0 (i.e. "no").
* Match article_id from dat_article_paragraph with id from dat_article.
* Carry out data cleansing on titles and paragraph contents.

In [16]:
SQLCommand = """SELECT article_id, title, content 
                FROM ESTAT.V1.dat_article_paragraph
                WHERE abstract=0 AND article_id IN (SELECT id FROM ESTAT.V1.dat_article) """

add_content = pd.read_sql(SQLCommand,c)
add_content['title'] = add_content['title'].apply(clean)
add_content['content'] = add_content['content'].apply(clean)
add_content

Unnamed: 0,article_id,title,content
0,2905,Absences from work sharply increase in first h...,Absences from work recorded unprecedented high...
1,2905,Absences: 9.5 % of employment in Q4 2019 and 1...,The article's next figure (Figure 4) compares ...
2,2905,Higher share of absences from work among women...,"Considering all four quarters of 2020, the sha..."
3,2905,Absences from work due to own illness or disab...,"From Q4 2019 to Q4 2020, the number of people ..."
4,2905,Absences from work due to holidays,"Expressed as a share of employed people, absen..."
...,...,...,...
3854,10539,General presentation and definition,Scope of asylum statistics and Dublin statisti...
3855,10539,Methodological aspects in asylum statistics,Annual aggregate of the number of asylum appli...
3856,10539,Methodological aspects in Dublin statistics,Asymmetries For most of the collected Dublin s...
3857,10539,What questions can or cannot be answered with ...,How many asylum seekers are entering EU Member...


### Aggregate above paragraph titles and contents  from SE articles paragraphs by article id

* Create a column _raw content_ which gathers all paragraph titles and contents in one text per article.

In [17]:
add_content_grouped = add_content.groupby(['article_id'])[['title','content']].aggregate(lambda x: list(x))
add_content_grouped.reset_index(drop=False, inplace=True)
for i in range(len(add_content_grouped)):
    add_content_grouped.loc[i,'raw content'] = ''
    for (a,b) in zip(add_content_grouped.loc[i,'title'],add_content_grouped.loc[i,'content']):
        add_content_grouped.loc[i,'raw content'] += ' '+a + ' ' + b
add_content_grouped = add_content_grouped[['article_id','raw content']]    

add_content_grouped

Unnamed: 0,article_id,raw content
0,7,"Number of accidents In 2018, there were 3.1 m..."
1,13,Developments for GDP in the EU-27: growth sin...
2,16,Fall in the number of railway accidents 9 % f...
3,17,Downturn for EU transport performance in 2019...
4,18,Rail passenger transport performance continue...
...,...,...
860,10456,Problem After successfully identifying and jo...
861,10470,"Problem In France, there was significant room..."
862,10506,General overview Nine PEEIs concern short-ter...
863,10531,What are administrative sources? The term 'ad...


### Merge raw content of SE articles with main file

* Also, add title to definition.

In [18]:
SE_df = pd.merge(SE_df,add_content_grouped,left_on='id',right_on='article_id',how='inner')
SE_df.drop(['article_id'],axis=1,inplace=True)

SE_df['raw content'] = SE_df['title'] +'. '+SE_df['raw content']

SE_df.head(5)

Unnamed: 0,id,title,raw content
0,7,Accidents at work statistics,Accidents at work statistics. Number of accid...
1,13,National accounts and GDP,National accounts and GDP. Developments for G...
2,16,Railway safety statistics in the EU,Railway safety statistics in the EU. Fall in ...
3,17,Railway freight transport statistics,Railway freight transport statistics. Downtur...
4,18,Railway passenger transport statistics - quart...,Railway passenger transport statistics - quart...


### Lemmatize 'raw content'

* NLTK seems to be better than Spacy in lemmatization. Convert to lower-case before.

In [19]:
import nltk

w_tokenizer = nltk.tokenize.WhitespaceTokenizer()
lemmatizer = nltk.stem.WordNetLemmatizer()

def lemmatize_text(text):
    return [lemmatizer.lemmatize(w) for w in w_tokenizer.tokenize(text)]

SE_df['raw content'] = SE_df['raw content'].apply(lambda x: x.lower())
SE_df['raw content']= SE_df['raw content'].apply(lemmatize_text)
SE_df['raw content']= [' '.join(map(str, l)) for l in SE_df['raw content']]
SE_df['raw content'] = SE_df['raw content'].apply(lambda x: x.upper())
SE_df


Unnamed: 0,id,title,raw content
0,7,Accidents at work statistics,ACCIDENT AT WORK STATISTICS. NUMBER OF ACCIDEN...
1,13,National accounts and GDP,NATIONAL ACCOUNT AND GDP. DEVELOPMENT FOR GDP ...
2,16,Railway safety statistics in the EU,RAILWAY SAFETY STATISTIC IN THE EU. FALL IN TH...
3,17,Railway freight transport statistics,RAILWAY FREIGHT TRANSPORT STATISTICS. DOWNTURN...
4,18,Railway passenger transport statistics - quart...,RAILWAY PASSENGER TRANSPORT STATISTIC - QUARTE...
...,...,...,...
860,10456,"Merging statistics and geospatial information,...","MERGING STATISTIC AND GEOSPATIAL INFORMATION, ..."
861,10470,"Merging statistics and geospatial information,...","MERGING STATISTIC AND GEOSPATIAL INFORMATION, ..."
862,10506,Methods for compiling PEEIs in short-term busi...,METHOD FOR COMPILING PEEIS IN SHORT-TERM BUSIN...
863,10531,Building the System of National Accounts - adm...,BUILDING THE SYSTEM OF NATIONAL ACCOUNT - ADMI...


### OECD - Glossary of Statistical Terms
https://stats.oecd.org/glossary/alpha.asp

* Scrape terms and lemmatize.

In [20]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import re

url = "https://stats.oecd.org/glossary/alpha.asp"
html = urlopen(url)
soup = BeautifulSoup(html, 'lxml')
text = soup.get_text()
    
rows = soup.find_all('tr')
str_cells = str(rows)
cleantext = BeautifulSoup(str_cells, "lxml").get_text()
#print(cleantext)

list_rows = []
for row in rows:
    cells = row.find_all('a')
    str_cells = str(cells)
    clean = re.compile('<.*?>')
    clean2 = (re.sub(clean, '',str_cells))
    list_rows.append(clean2)
#print(clean2)
#type(clean2)

df = pd.DataFrame(list_rows)
df.head(10)
df[0]=df[0].apply(lambda x: re.sub(r'\[',' ',x))
df[0]=df[0].apply(lambda x: re.sub(r'\]',' ',x))
df1 = df[0].str.split(',', expand=True)
df_t = df1.T
df_t=df_t[[22]]
df_t = df_t.rename(columns={22: 'term'})
nan_value = float("NaN")

df_t.replace(" ", nan_value, inplace=True)

df_t.dropna(subset = ["term"], inplace=True)
df_t.replace(" ", nan_value, inplace=True)
df_t.insert(0, 'id', range(len(df_t)))
df_t.reset_index(inplace=True)
df_t.drop(columns=['index'],inplace=True)
df_t.head()

df_t['lemmatized_term']= df_t['term'].apply(lambda x: x.lower())
df_t['lemmatized_term']= df_t['lemmatized_term'].apply(lemmatize_text)
df_t['lemmatized_term']= [' '.join(map(str, l)) for l in df_t['lemmatized_term']]
df_t['lemmatized_term']= df_t['lemmatized_term'].apply(lambda x: x.upper())
df_t

Unnamed: 0,id,term,lemmatized_term
0,0,A posteriori audit,A POSTERIORI AUDIT
1,1,A priori audit,A PRIORI AUDIT
2,2,A programme language (APL),A PROGRAMME LANGUAGE (APL)
3,3,Abatement,ABATEMENT
4,4,Abatement cost,ABATEMENT COST
...,...,...,...
7074,7074,Zero-coupon / deep discount bond,ZERO-COUPON / DEEP DISCOUNT BOND
7075,7075,Zero-coupon bonds,ZERO-COUPON BOND
7076,7076,Zones,ZONE
7077,7077,Zoning,ZONING


### Prepare Spacy's PhraseMatcher by building a custom vocabulary from OECD's Glossary (lemmatized_term)

In [21]:
from spacy.matcher import PhraseMatcher

matcher = PhraseMatcher(nlp.vocab)
terms = df_t['lemmatized_term'].values.tolist()
# Only run nlp.make_doc to speed things up
patterns = [nlp.make_doc(text) for text in terms]
matcher.add("TerminologyList", patterns)

### Apply PhraseMatcher

* Collect results per SE article ('doc_id') in a dataframe res. Ignore matches with 2 words or less.
* Depending on length of match: columns '3-Phrases', '4-Phrases', '5-and-above-Phrases'. These will contain dictionaries with the matched lemmatized terms and their counts, in descending order of counts.
* Column 'Terms' has a dictionary with the corresponding **original terms** in OECD's Glossary and their counts in the matches.

In [22]:
res = pd.DataFrame(index=range(len(SE_df)))
res['3-Phrases']=[[] for i in range(len(SE_df))]
res['4-Phrases']=[[] for i in range(len(SE_df))]
res['5-and-above-Phrases']=[[] for i in range(len(SE_df))]
res['Terms']=[dict() for i in range(len(SE_df))]
docs=nlp.pipe(SE_df['raw content'])
for (i,doc) in enumerate(docs):
    print(i)
    for sent in doc.sents:
        matches = matcher(sent)
        for match_id, start, end in matches:
            span = doc[start:end]
            n_words = len(span.text.split(' '))
            if n_words >= 3:
                doc_id = SE_df.loc[i,'id']
                idx = df_t.index[df_t['lemmatized_term'].str.contains(span.text,regex=False)].tolist()
                print(i,SE_df.loc[i,'title'],len(sent.text),'>',n_words,span.text,idx)
                res.loc[i,'doc_id']=doc_id
                for elem in df_t.loc[idx,'term'].values.tolist():
                    if elem in res.loc[i,'Terms'].keys():
                        res.loc[i,'Terms'][elem] +=1
                    else:
                        res.loc[i,'Terms'][elem] =1
                #res.loc[i,'Terms'].append(concepts_df.loc[idx,'term'].values.tolist())
                if n_words == 3:
                    res.loc[i,'3-Phrases'].append(span.text)
                elif n_words == 4:
                    res.loc[i,'4-Phrases'].append(span.text)
                else:
                    res.loc[i,'5-and-above-Phrases'].append(span.text)
                    
                    


0
0 Accidents at work statistics 324 > 4 HEALTH AND SOCIAL WORK [1988, 3114]
1
1 National accounts and GDP 190 > 3 RATE OF CHANGE [247, 5346, 5937, 6640]
1 National accounts and GDP 142 > 3 RATE OF CHANGE [247, 5346, 5937, 6640]
1 National accounts and GDP 254 > 3 RATE OF CHANGE [247, 5346, 5937, 6640]
1 National accounts and GDP 206 > 3 RATE OF CHANGE [247, 5346, 5937, 6640]
1 National accounts and GDP 143 > 3 RATE OF CHANGE [247, 5346, 5937, 6640]
1 National accounts and GDP 231 > 3 RATE OF CHANGE [247, 5346, 5937, 6640]
1 National accounts and GDP 206 > 3 ANNUAL GROWTH RATE [242, 248]
1 National accounts and GDP 206 > 3 RATE OF CHANGE [247, 5346, 5937, 6640]
1 National accounts and GDP 314 > 3 RATE OF CHANGE [247, 5346, 5937, 6640]
1 National accounts and GDP 173 > 3 RATE OF CHANGE [247, 5346, 5937, 6640]
1 National accounts and GDP 162 > 3 RATE OF CHANGE [247, 5346, 5937, 6640]
1 National accounts and GDP 162 > 3 PURCHASING POWER STANDARD [5219]
1 National accounts and GDP 175 > 3 

33 International trade in goods 169 > 3 ANNUAL GROWTH RATE [242, 248]
34
34 Material flow accounts and resource productivity 128 > 4 GROSS DOMESTIC PRODUCT (GDP) [2812, 2813, 2814, 2815, 2816, 2817, 2818, 2819, 4298, 4957]
34 Material flow accounts and resource productivity 128 > 4 DOMESTIC MATERIAL CONSUMPTION (DMC) [1713]
34 Material flow accounts and resource productivity 219 > 3 GROSS CAPITAL FORMATION [2806]
35
35 Digital economy and society statistics - enterprises 259 > 4 ELECTRONIC DATA INTERCHANGE (EDI) [1932]
36
37
37 Healthy life years statistics 248 > 3 QUALITY OF LIFE [5263]
38
39
40
40 Children at risk of poverty or social exclusion 539 > 3 TYPE OF HOUSEHOLD [6662]
40 Children at risk of poverty or social exclusion 539 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
40 Children at risk of poverty or social exclusion 539 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
40 Children at risk of poverty or social exclusion 472 > 3 LEVEL OF EDUCATION [3628, 4261, 426

59 Healthcare resource statistics - beds 123 > 5 LONG-TERM CARE BED IN HOSPITAL [3734]
59 Healthcare resource statistics - beds 300 > 5 LONG-TERM CARE BED IN HOSPITAL [3734]
59 Healthcare resource statistics - beds 115 > 3 PSYCHIATRIC CARE BED [5185]
60
61
62
62 Fishery statistics 118 > 4 COMMON FISHERY POLICY (CFP) [968]
63
64
64 Balance of payments statistics 30 > 3 BALANCE OF PAYMENT [382, 384, 385, 386, 388, 390, 392, 394, 396, 398, 400, 402, 404, 406, 959, 1317, 6403, 6407]
64 Balance of payments statistics 147 > 4 GROSS DOMESTIC PRODUCT (GDP) [2812, 2813, 2814, 2815, 2816, 2817, 2818, 2819, 4298, 4957]
64 Balance of payments statistics 131 > 3 OFFSHORE FINANCIAL CENTRE [4497]
65
65 Health statistics at regional level 307 > 3 CAUSE OF DEATH [743, 784, 1165, 6693]
65 Health statistics at regional level 307 > 3 CAUSE OF DEATH [743, 784, 1165, 6693]
65 Health statistics at regional level 307 > 3 CAUSE OF DEATH [743, 784, 1165, 6693]
65 Health statistics at regional level 307 > 3 CAUS

99 Statistics on young people neither in employment nor in education or training 255 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
99 Statistics on young people neither in employment nor in education or training 255 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
99 Statistics on young people neither in employment nor in education or training 76 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
99 Statistics on young people neither in employment nor in education or training 202 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
99 Statistics on young people neither in employment nor in education or training 160 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
99 Statistics on young people neither in employment nor in education or training 114 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
99 Statistics on young people neither in employment nor in education or training 241 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
99 Statistics on young peopl

109
110
110 Tourism industries - economic analysis 192 > 5 VALUE ADDED AT FACTOR COST [2841, 6828]
110 Tourism industries - economic analysis 232 > 5 VALUE ADDED AT FACTOR COST [2841, 6828]
110 Tourism industries - economic analysis 194 > 5 VALUE ADDED AT FACTOR COST [2841, 6828]
111
111 Healthcare expenditure statistics 187 > 4 GROSS DOMESTIC PRODUCT (GDP) [2812, 2813, 2814, 2815, 2816, 2817, 2818, 2819, 4298, 4957]
111 Healthcare expenditure statistics 180 > 3 PURCHASING POWER STANDARD [5219]
111 Healthcare expenditure statistics 156 > 3 RATE OF CHANGE [247, 5346, 5937, 6640]
112
112 Earnings statistics 243 > 3 PURCHASING POWER STANDARD [5219]
112 Earnings statistics 329 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
112 Earnings statistics 329 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
112 Earnings statistics 185 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
113
113 Causes of death statistics - people over 65 42 > 3 CAUSE OF DEATH [743, 784, 1165, 6693]
113

141
142
143
144
144 Social protection statistics - pension expenditure and pension beneficiaries 346 > 3 SOCIAL SECURITY BENEFIT [4609, 5457, 5972, 5973]
144 Social protection statistics - pension expenditure and pension beneficiaries 475 > 3 NORMAL RETIREMENT AGE [4399]
144 Social protection statistics - pension expenditure and pension beneficiaries 196 > 3 PURCHASING POWER STANDARD [5219]
145
145 Building the System of National Accounts - non-observed sector 480 > 4 BASIC DATA COLLECTION PROGRAMME [448]
145 Building the System of National Accounts - non-observed sector 480 > 3 DATA COLLECTION PROGRAMME [448, 1375]
145 Building the System of National Accounts - non-observed sector 250 > 4 BASIC DATA COLLECTION PROGRAMME [448]
145 Building the System of National Accounts - non-observed sector 250 > 3 DATA COLLECTION PROGRAMME [448, 1375]
145 Building the System of National Accounts - non-observed sector 435 > 4 HOUSEHOLD FINAL CONSUMPTION EXPENDITURE [2982]
146
146 Non-financial corpor

169 Building the System of National Accounts - basic concepts 166 > 5 PRODUCER FOR OWN FINAL USE [5108]
169 Building the System of National Accounts - basic concepts 602 > 5 TAX ON PRODUCTION AND IMPORT [6375]
169 Building the System of National Accounts - basic concepts 324 > 4 CONSUMPTION OF FIXED CAPITAL [1133]
169 Building the System of National Accounts - basic concepts 130 > 4 CENTRAL PRODUCT CLASSIFICATION (CPC) [772]
169 Building the System of National Accounts - basic concepts 221 > 5 STANDARD INTERNATIONAL TRADE CLASSIFICATION (SITC) [6066]
169 Building the System of National Accounts - basic concepts 99 > 4 CONSUMPTION OF FIXED CAPITAL [1133]
169 Building the System of National Accounts - basic concepts 236 > 4 CONSUMPTION OF FIXED CAPITAL [1133]
169 Building the System of National Accounts - basic concepts 161 > 3 OTHER ACCUMULATION ENTRY [4575]
169 Building the System of National Accounts - basic concepts 648 > 7 CLASSIFICATION OF INDIVIDUAL CONSUMPTION BY PURPOSE (COICOP)

180 Monitoring GNI for own resource purposes 122 > 4 NATIONAL STATISTICAL INSTITUTE (NSI) [4160]
180 Monitoring GNI for own resource purposes 242 > 6 FINANCIAL INTERMEDIATION SERVICE INDIRECTLY MEASURED (FISIM) [2394]
180 Monitoring GNI for own resource purposes 236 > 6 FINANCIAL INTERMEDIATION SERVICE INDIRECTLY MEASURED (FISIM) [2394]
180 Monitoring GNI for own resource purposes 236 > 4 GROSS NATIONAL INCOME (GNI) [2825]
181
181 Electricity generation statistics ' first results 345 > 3 NUCLEAR POWER PLANT [4421]
181 Electricity generation statistics ' first results 104 > 3 NUCLEAR POWER PLANT [4421]
181 Electricity generation statistics ' first results 164 > 3 NUCLEAR POWER PLANT [4421]
182
182 European system of national and regional accounts - ESA 2010 61 > 4 RESEARCH AND DEVELOPMENT EXPENDITURE [5542]
182 European system of national and regional accounts - ESA 2010 70 > 4 GROSS DOMESTIC PRODUCT (GDP) [2812, 2813, 2814, 2815, 2816, 2817, 2818, 2819, 4298, 4957]
182 European system 

224 Causes of death statistics 100 > 3 CAUSE OF DEATH [743, 784, 1165, 6693]
224 Causes of death statistics 218 > 3 CAUSE OF DEATH [743, 784, 1165, 6693]
224 Causes of death statistics 335 > 4 UNDERLYING CAUSE OF DEATH [6693]
224 Causes of death statistics 335 > 3 CAUSE OF DEATH [743, 784, 1165, 6693]
224 Causes of death statistics 169 > 4 UNDERLYING CAUSE OF DEATH [6693]
224 Causes of death statistics 169 > 3 CAUSE OF DEATH [743, 784, 1165, 6693]
224 Causes of death statistics 124 > 3 CAUSE OF DEATH [743, 784, 1165, 6693]
224 Causes of death statistics 157 > 3 CAUSE OF DEATH [743, 784, 1165, 6693]
224 Causes of death statistics 155 > 3 CAUSE OF DEATH [743, 784, 1165, 6693]
224 Causes of death statistics 19 > 3 CAUSE OF DEATH [743, 784, 1165, 6693]
224 Causes of death statistics 107 > 3 CAUSE OF DEATH [743, 784, 1165, 6693]
224 Causes of death statistics 94 > 3 CRUDE DEATH RATE [1290]
224 Causes of death statistics 94 > 3 CRUDE DEATH RATE [1290]
224 Causes of death statistics 54 > 3 CR

249 Main users of national accounts 192 > 3 EUROPEAN CENTRAL BANK [2162]
249 Main users of national accounts 53 > 4 STABILITY AND GROWTH PACT [6049]
249 Main users of national accounts 58 > 4 STABILITY AND GROWTH PACT [6049]
249 Main users of national accounts 60 > 3 EXCESSIVE DEFICIT PROCEDURE [2200]
249 Main users of national accounts 213 > 4 OFFICIAL DEVELOPMENT ASSISTANCE (ODA) [4486, 4487]
249 Main users of national accounts 99 > 4 GROSS NATIONAL INCOME (GNI) [2825]
249 Main users of national accounts 145 > 3 TRADITIONAL OWN RESOURCE [6556]
250
251
251 Asia-Europe Meeting (ASEM) - a statistical portrait - labour market 180 > 3 ECONOMICALLY ACTIVE PERSON [1850]
251 Asia-Europe Meeting (ASEM) - a statistical portrait - labour market 125 > 3 CONTRIBUTING FAMILY WORKER [1160]
251 Asia-Europe Meeting (ASEM) - a statistical portrait - labour market 231 > 3 MARKET EXCHANGE RATE [3840]
251 Asia-Europe Meeting (ASEM) - a statistical portrait - labour market 190 > 3 TOTAL LABOUR FORCE [6506

269 Construction of buildings statistics - NACE Rev. 2 258 > 3 APPARENT LABOUR PRODUCTIVITY [267]
269 Construction of buildings statistics - NACE Rev. 2 93 > 3 APPARENT LABOUR PRODUCTIVITY [267]
269 Construction of buildings statistics - NACE Rev. 2 121 > 3 APPARENT LABOUR PRODUCTIVITY [267]
269 Construction of buildings statistics - NACE Rev. 2 239 > 3 APPARENT LABOUR PRODUCTIVITY [267]
270
270 HICP at constant tax rates 117 > 3 TAX ON PRODUCT [4614, 6375, 6376]
270 HICP at constant tax rates 173 > 3 TAX ON PRODUCT [4614, 6375, 6376]
270 HICP at constant tax rates 139 > 3 RATE OF CHANGE [247, 5346, 5937, 6640]
270 HICP at constant tax rates 55 > 3 TAX ON PRODUCT [4614, 6375, 6376]
270 HICP at constant tax rates 148 > 3 RATE OF CHANGE [247, 5346, 5937, 6640]
270 HICP at constant tax rates 136 > 3 TAX ON PRODUCT [4614, 6375, 6376]
270 HICP at constant tax rates 66 > 3 PRICE REFERENCE PERIOD [5023]
270 HICP at constant tax rates 203 > 3 TAX ON PRODUCT [4614, 6375, 6376]
270 HICP at const

272 Building the System of National Accounts - supply and use tables 33 > 3 SUBSIDY ON PRODUCT [4611, 4612, 6216, 6218, 6219]
272 Building the System of National Accounts - supply and use tables 229 > 4 OTHER TAX ON PRODUCTION [4614]
272 Building the System of National Accounts - supply and use tables 229 > 4 OTHER SUBSIDY ON PRODUCTION [4611]
272 Building the System of National Accounts - supply and use tables 229 > 3 SUBSIDY ON PRODUCTION [4611, 6216]
272 Building the System of National Accounts - supply and use tables 205 > 5 TAX AND DUTY ON IMPORT [6364]
272 Building the System of National Accounts - supply and use tables 205 > 3 TAX ON PRODUCT [4614, 6375, 6376]
272 Building the System of National Accounts - supply and use tables 154 > 5 TAX AND DUTY ON IMPORT [6364]
272 Building the System of National Accounts - supply and use tables 90 > 3 TAX ON PRODUCT [4614, 6375, 6376]
272 Building the System of National Accounts - supply and use tables 226 > 3 SUBSIDY ON PRODUCT [4611, 4612

273 Supply and use tables - input-output analysis 312 > 4 SUPPLY AND USE TABLE [3005, 4860, 6254, 6255, 6256, 6257]
273 Supply and use tables - input-output analysis 197 > 3 CARBON DIOXIDE EMISSION [699]
274
275
276
277
278
279
280
281
282
283
284
284 Culture statistics - international trade in cultural goods 222 > 3 RATE OF CHANGE [247, 5346, 5937, 6640]
285
286
286 Distributive trade statistics - NACE Rev. 2 198 > 3 APPARENT LABOUR PRODUCTIVITY [267]
286 Distributive trade statistics - NACE Rev. 2 213 > 3 APPARENT LABOUR PRODUCTIVITY [267]
286 Distributive trade statistics - NACE Rev. 2 353 > 3 APPARENT LABOUR PRODUCTIVITY [267]
286 Distributive trade statistics - NACE Rev. 2 353 > 3 APPARENT LABOUR PRODUCTIVITY [267]
286 Distributive trade statistics - NACE Rev. 2 255 > 3 APPARENT LABOUR PRODUCTIVITY [267]
286 Distributive trade statistics - NACE Rev. 2 278 > 3 APPARENT LABOUR PRODUCTIVITY [267]
286 Distributive trade statistics - NACE Rev. 2 188 > 3 APPARENT LABOUR PRODUCTIVITY [26

326 Environmental economy ' statistics on employment and growth 247 > 3 GROSS VALUE ADDED [2839, 2840, 2841, 2842, 2843]
327
327 Environmental tax statistics - detailed analysis 206 > 6 GROSS DOMESTIC PRODUCT ( GDP ) []
327 Environmental tax statistics - detailed analysis 90 > 3 ANNUAL GROWTH RATE [242, 248]
327 Environmental tax statistics - detailed analysis 108 > 3 TAX ON POLLUTION [6374]
327 Environmental tax statistics - detailed analysis 85 > 5 EUROPEAN FREE TRADE ASSOCIATION (EFTA) [2170]
327 Environmental tax statistics - detailed analysis 210 > 4 OTHER TAX ON PRODUCTION [4614]
327 Environmental tax statistics - detailed analysis 65 > 5 TONNE OF OIL EQUIVALENT (TOE) [6476]
327 Environmental tax statistics - detailed analysis 204 > 3 TAX ON POLLUTION [6374]
328
328 EU imports of energy products - recent developments 134 > 3 TERM OF TRADE [6431]
329
330
330 EU citizens living in another Member State - statistical_overview 239 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]


345 European Neighbourhood Policy - East - labour market statistics 168 > 3 ECONOMICALLY ACTIVE POPULATION [1851]
345 European Neighbourhood Policy - East - labour market statistics 110 > 3 ECONOMICALLY ACTIVE POPULATION [1851]
345 European Neighbourhood Policy - East - labour market statistics 154 > 3 ECONOMICALLY ACTIVE POPULATION [1851]
345 European Neighbourhood Policy - East - labour market statistics 238 > 4 INTERNATIONAL LABOUR ORGANISATION (ILO) [3351]
345 European Neighbourhood Policy - East - labour market statistics 64 > 3 TOTAL HOUR WORKED [6505]
346
347
347 European Neighbourhood Policy - South - agriculture statistics 297 > 3 GROSS VALUE ADDED [2839, 2840, 2841, 2842, 2843]
347 European Neighbourhood Policy - South - agriculture statistics 297 > 3 GROSS VALUE ADDED [2839, 2840, 2841, 2842, 2843]
347 European Neighbourhood Policy - South - agriculture statistics 275 > 3 GROSS VALUE ADDED [2839, 2840, 2841, 2842, 2843]
348
348 European Neighbourhood Policy - South - economi

361 International Trade in Services statistics - background 173 > 4 INTERNATIONAL TRADE IN SERVICE [3380, 4016]
361 International Trade in Services statistics - background 175 > 5 REST OF THE WORLD ACCOUNT [5578]
361 International Trade in Services statistics - background 270 > 4 INTERNATIONAL TRADE IN SERVICE [3380, 4016]
361 International Trade in Services statistics - background 270 > 3 BALANCE OF PAYMENT [382, 384, 385, 386, 388, 390, 392, 394, 396, 398, 400, 402, 404, 406, 959, 1317, 6403, 6407]
361 International Trade in Services statistics - background 89 > 3 BALANCE OF PAYMENT [382, 384, 385, 386, 388, 390, 392, 394, 396, 398, 400, 402, 404, 406, 959, 1317, 6403, 6407]
361 International Trade in Services statistics - background 85 > 7 GENERAL AGREEMENT ON TRADE IN SERVICE (GATS) [2641]
362
362 European Neighbourhood Policy - South - energy and environment 146 > 3 OTHER WOODED LAND [4616]
362 European Neighbourhood Policy - South - energy and environment 132 > 3 OTHER WOODED LAN

384 Government expenditure on housing and community amenities 333 > 3 COMPENSATION OF EMPLOYEE [988]
385
385 Government expenditure on education 290 > 8 SOCIAL BENEFIT OTHER THAN SOCIAL TRANSFER IN KIND [5958]
385 Government expenditure on education 290 > 4 SOCIAL TRANSFER IN KIND [5958, 5980]
385 Government expenditure on education 290 > 3 TRANSFER IN KIND [685, 5958, 5980, 6580]
385 Government expenditure on education 290 > 4 SOCIAL TRANSFER IN KIND [5958, 5980]
385 Government expenditure on education 290 > 3 TRANSFER IN KIND [685, 5958, 5980, 6580]
386
386 Integrated government finance statistics presentation 133 > 3 EXCESSIVE DEFICIT PROCEDURE [2200]
386 Integrated government finance statistics presentation 195 > 3 ANNUAL GROWTH RATE [242, 248]
386 Integrated government finance statistics presentation 136 > 3 GROSS CAPITAL FORMATION [2806]
386 Integrated government finance statistics presentation 227 > 5 TAX ON PRODUCTION AND IMPORT [6375]
386 Integrated government finance statisti

412 ICT education - a statistical overview 165 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
412 ICT education - a statistical overview 236 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
412 ICT education - a statistical overview 143 > 3 RATE OF CHANGE [247, 5346, 5937, 6640]
412 ICT education - a statistical overview 315 > 3 ANNUAL GROWTH RATE [242, 248]
413
413 Impact of Covid-19 crisis on services 71 > 3 RATE OF CHANGE [247, 5346, 5937, 6640]
414
415
415 Impact of Covid-19 crisis on industrial production 178 > 3 RATE OF CHANGE [247, 5346, 5937, 6640]
415 Impact of Covid-19 crisis on industrial production 166 > 3 RATE OF CHANGE [247, 5346, 5937, 6640]
415 Impact of Covid-19 crisis on industrial production 124 > 3 RATE OF CHANGE [247, 5346, 5937, 6640]
415 Impact of Covid-19 crisis on industrial production 79 > 3 RATE OF CHANGE [247, 5346, 5937, 6640]
416
416 Impact of Covid-19 crisis on construction 187 > 3 RATE OF CHANGE [247, 5346, 5937, 6640]
417
418
418 Constructio

436 Living conditions statistics at regional level 602 > 3 QUALITY OF LIFE [5263]
436 Living conditions statistics at regional level 119 > 3 QUALITY OF LIFE [5263]
437
437 Long term developments in industrial production - results from short-term statistics 168 > 3 ANNUAL GROWTH RATE [242, 248]
437 Long term developments in industrial production - results from short-term statistics 141 > 3 RATE OF CHANGE [247, 5346, 5937, 6640]
437 Long term developments in industrial production - results from short-term statistics 150 > 3 INDUSTRIAL PRODUCTION INDEX [3170]
437 Long term developments in industrial production - results from short-term statistics 111 > 3 RATE OF CHANGE [247, 5346, 5937, 6640]
437 Long term developments in industrial production - results from short-term statistics 287 > 3 ANNUAL GROWTH RATE [242, 248]
437 Long term developments in industrial production - results from short-term statistics 287 > 3 RATE OF CHANGE [247, 5346, 5937, 6640]
437 Long term developments in industri

457 Material deprivation statistics ' financial stress and lack of durables 206 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
458
458 Migrant integration statistics - housing 251 > 3 PURCHASING POWER STANDARD [5219]
459
459 Migrant integration statistics - education 408 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
459 Migrant integration statistics - education 195 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
459 Migrant integration statistics - education 198 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
459 Migrant integration statistics - education 367 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
459 Migrant integration statistics - education 277 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
459 Migrant integration statistics - education 277 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
459 Migrant integration statistics - education 296 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
459 Migrant integration statistics - 

474 Pensions in national accounts - statistics 434 > 3 SOCIAL SECURITY SCHEME [5977]
474 Pensions in national accounts - statistics 80 > 3 SOCIAL SECURITY SCHEME [5977]
474 Pensions in national accounts - statistics 161 > 3 SOCIAL INSURANCE SCHEME [5968]
475
476
476 Preventable and treatable mortality statistics 63 > 3 CAUSE OF DEATH [743, 784, 1165, 6693]
476 Preventable and treatable mortality statistics 154 > 3 CAUSE OF DEATH [743, 784, 1165, 6693]
476 Preventable and treatable mortality statistics 246 > 3 CAUSE OF DEATH [743, 784, 1165, 6693]
476 Preventable and treatable mortality statistics 255 > 3 CAUSE OF DEATH [743, 784, 1165, 6693]
476 Preventable and treatable mortality statistics 246 > 3 CAUSE OF DEATH [743, 784, 1165, 6693]
476 Preventable and treatable mortality statistics 97 > 3 CAUSE OF DEATH [743, 784, 1165, 6693]
476 Preventable and treatable mortality statistics 75 > 3 CAUSE OF DEATH [743, 784, 1165, 6693]
476 Preventable and treatable mortality statistics 243 > 3 CA

501 International Standard Classification of Education (ISCED) 121 > 3 ADMINISTRATIVE DATA COLLECTION [116]
501 International Standard Classification of Education (ISCED) 151 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
501 International Standard Classification of Education (ISCED) 151 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
501 International Standard Classification of Education (ISCED) 74 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
501 International Standard Classification of Education (ISCED) 196 > 3 ADMINISTRATIVE DATA COLLECTION [116]
502
502 Self-reported work-related health problems and risk factors - key statistics 186 > 3 ADMINISTRATIVE DATA COLLECTION [116]
502 Self-reported work-related health problems and risk factors - key statistics 130 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
502 Self-reported work-related health problems and risk factors - key statistics 109 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
502 Self-repor

545 World trade in services 297 > 3 BALANCE OF PAYMENT [382, 384, 385, 386, 388, 390, 392, 394, 396, 398, 400, 402, 404, 406, 959, 1317, 6403, 6407]
545 World trade in services 297 > 3 INTERNATIONAL INVESTMENT POSITION [3350, 4224]
545 World trade in services 151 > 4 COMPUTER AND INFORMATION SERVICE [1033]
545 World trade in services 279 > 4 COMPUTER AND INFORMATION SERVICE [1033]
546
546 Young people on the labour market - statistics 182 > 3 PLACE OF RESIDENCE [4896]
546 Young people on the labour market - statistics 133 > 3 PLACE OF RESIDENCE [4896]
546 Young people on the labour market - statistics 147 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
546 Young people on the labour market - statistics 72 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
547
547 EU Labour Force Survey 120 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
548
548 Young people - social inclusion 296 > 3 POPULATION AT RISK [4929]
549
549 Young people - housing conditions 79 > 3 POPULATION AT

587 SDG 10 - Reduced inequalities 119 > 3 PURCHASING POWER STANDARD [5219]
587 SDG 10 - Reduced inequalities 135 > 3 COEFFICIENT OF VARIATION [909]
588
588 SDG 11 - Sustainable cities and communities 231 > 3 QUALITY OF LIFE [5263]
588 SDG 11 - Sustainable cities and communities 177 > 3 QUALITY OF LIFE [5263]
588 SDG 11 - Sustainable cities and communities 502 > 3 QUALITY OF LIFE [5263]
588 SDG 11 - Sustainable cities and communities 31 > 3 QUALITY OF LIFE [5263]
588 SDG 11 - Sustainable cities and communities 150 > 3 QUALITY OF LIFE [5263]
588 SDG 11 - Sustainable cities and communities 300 > 3 QUALITY OF LIFE [5263]
588 SDG 11 - Sustainable cities and communities 163 > 3 PLACE OF WORK [2449, 4285, 4901]
588 SDG 11 - Sustainable cities and communities 163 > 3 QUALITY OF LIFE [5263]
588 SDG 11 - Sustainable cities and communities 179 > 3 QUALITY OF LIFE [5263]
589
589 SDG 12 - Responsible consumption and production (statistical annex) 173 > 4 DOMESTIC MATERIAL CONSUMPTION (DMC) [1713]
5

611 Learning mobility statistics 122 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
612
612 Ageing Europe ? statistics on housing and living conditions 372 > 3 TYPE OF HOUSEHOLD [6662]
612 Ageing Europe ? statistics on housing and living conditions 174 > 3 REPAIR AND MAINTENANCE [1331, 5500]
612 Ageing Europe ? statistics on housing and living conditions 190 > 3 TYPE OF HOUSEHOLD [6662]
612 Ageing Europe ? statistics on housing and living conditions 54 > 3 TYPE OF HOUSEHOLD [6662]
612 Ageing Europe ? statistics on housing and living conditions 171 > 3 QUALITY OF LIFE [5263]
612 Ageing Europe ? statistics on housing and living conditions 62 > 3 TYPE OF HOUSEHOLD [6662]
613
614
614 Concepts for household income ? comparison between micro and macro approach 154 > 4 SOCIAL TRANSFER IN KIND [5958, 5980]
614 Concepts for household income ? comparison between micro and macro approach 154 > 3 TRANSFER IN KIND [685, 5958, 5980, 6580]
614 Concepts for household income ? comparison between

629 Impact of COVID-19 on main GDP aggregates including employment 73 > 3 GROSS VALUE ADDED [2839, 2840, 2841, 2842, 2843]
629 Impact of COVID-19 on main GDP aggregates including employment 105 > 3 GROSS VALUE ADDED [2839, 2840, 2841, 2842, 2843]
629 Impact of COVID-19 on main GDP aggregates including employment 125 > 3 COMPENSATION OF EMPLOYEE [988]
630
630 Impact of Covid-19 crisis on retail trade 74 > 3 RATE OF CHANGE [247, 5346, 5937, 6640]
631
632
633
634
635
636
636 ICT sector - value added, employment and R&D 234 > 4 GROSS DOMESTIC PRODUCT (GDP) [2812, 2813, 2814, 2815, 2816, 2817, 2818, 2819, 4298, 4957]
636 ICT sector - value added, employment and R&D 199 > 3 ANNUAL GROWTH RATE [242, 248]
636 ICT sector - value added, employment and R&D 197 > 3 APPARENT LABOUR PRODUCTIVITY [267]
636 ICT sector - value added, employment and R&D 197 > 3 APPARENT LABOUR PRODUCTIVITY [267]
636 ICT sector - value added, employment and R&D 210 > 3 APPARENT LABOUR PRODUCTIVITY [267]
636 ICT sector - 

659 Extra-EU trade in agricultural goods 89 > 3 ANNUAL GROWTH RATE [242, 248]
660
660 European Neighbourhood Policy - South - transport statistics 122 > 3 ROAD MOTOR VEHICLE [2754, 4735, 5638, 6663]
661
662
662 Extra-EU trade by invoicing currency 152 > 5 STANDARD INTERNATIONAL TRADE CLASSIFICATION (SITC) [6066]
663
663 European Neighbourhood Policy - East - population statistics 231 > 3 CRUDE BIRTH RATE [1289]
663 European Neighbourhood Policy - East - population statistics 231 > 3 CRUDE DEATH RATE [1290]
663 European Neighbourhood Policy - East - population statistics 243 > 3 CRUDE BIRTH RATE [1289]
663 European Neighbourhood Policy - East - population statistics 243 > 3 CRUDE DEATH RATE [1290]
663 European Neighbourhood Policy - East - population statistics 174 > 3 CRUDE BIRTH RATE [1289]
663 European Neighbourhood Policy - East - population statistics 174 > 3 CRUDE DEATH RATE [1290]
663 European Neighbourhood Policy - East - population statistics 86 > 3 CRUDE BIRTH RATE [1289]
663 

675 Enlargement countries - finance statistics 110 > 3 BALANCE OF PAYMENT [382, 384, 385, 386, 388, 390, 392, 394, 396, 398, 400, 402, 404, 406, 959, 1317, 6403, 6407]
675 Enlargement countries - finance statistics 41 > 3 BALANCE OF PAYMENT [382, 384, 385, 386, 388, 390, 392, 394, 396, 398, 400, 402, 404, 406, 959, 1317, 6403, 6407]
675 Enlargement countries - finance statistics 52 > 3 BALANCE OF PAYMENT [382, 384, 385, 386, 388, 390, 392, 394, 396, 398, 400, 402, 404, 406, 959, 1317, 6403, 6407]
675 Enlargement countries - finance statistics 245 > 3 FOREIGN DIRECT INVESTMENT [2506, 2507]
675 Enlargement countries - finance statistics 245 > 3 FOREIGN DIRECT INVESTMENT [2506, 2507]
675 Enlargement countries - finance statistics 165 > 3 FOREIGN DIRECT INVESTMENT [2506, 2507]
675 Enlargement countries - finance statistics 134 > 3 FOREIGN DIRECT INVESTMENT [2506, 2507]
675 Enlargement countries - finance statistics 89 > 3 FOREIGN DIRECT INVESTMENT [2506, 2507]
675 Enlargement countries - f

691 Balance of payment statistics - background 188 > 3 BALANCE OF PAYMENT [382, 384, 385, 386, 388, 390, 392, 394, 396, 398, 400, 402, 404, 406, 959, 1317, 6403, 6407]
691 Balance of payment statistics - background 188 > 3 INTERNATIONAL INVESTMENT POSITION [3350, 4224]
691 Balance of payment statistics - background 124 > 3 BALANCE OF PAYMENT [382, 384, 385, 386, 388, 390, 392, 394, 396, 398, 400, 402, 404, 406, 959, 1317, 6403, 6407]
691 Balance of payment statistics - background 124 > 3 INTERNATIONAL INVESTMENT POSITION [3350, 4224]
691 Balance of payment statistics - background 245 > 3 EUROPEAN CENTRAL BANK [2162]
691 Balance of payment statistics - background 136 > 3 BALANCE OF PAYMENT [382, 384, 385, 386, 388, 390, 392, 394, 396, 398, 400, 402, 404, 406, 959, 1317, 6403, 6407]
691 Balance of payment statistics - background 136 > 3 INTERNATIONAL INVESTMENT POSITION [3350, 4224]
691 Balance of payment statistics - background 169 > 3 EUROPEAN CENTRAL BANK [2162]
691 Balance of payment

715 EU-ASEAN cooperation - key economy and finance statistics 149 > 3 GROSS CAPITAL FORMATION [2806]
715 EU-ASEAN cooperation - key economy and finance statistics 81 > 3 GOVERNMENT GROSS DEBT [2646, 2651, 2771]
716
717
717 Accessing European statistics 358 > 3 QUALITY OF LIFE [5263]
717 Accessing European statistics 162 > 6 STATISTICAL DATA AND METADATA EXCHANGE (SDMX) [6087]
717 Accessing European statistics 237 > 3 SUSTAINABLE DEVELOPMENT INDICATOR [6288]
718
718 Ageing Europe - introduction 217 > 3 TYPE OF HOUSEHOLD [6662]
718 Ageing Europe - introduction 217 > 3 QUALITY OF LIFE [5263]
719
719 Agri-environmental indicator - High Nature Value farmland 326 > 4 COMMON AGRICULTURAL POLICY (CAP) [965, 966]
720
720 Being young in Europe today - executive summary 85 > 3 INFANT MORTALITY RATE [3182]
720 Being young in Europe today - executive summary 103 > 3 PERCEIVED HEALTH STATUS [4795]
720 Being young in Europe today - executive summary 76 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 

733 Building the System of National Accounts - volume measures 103 > 3 PAASCHE PRICE INDEX [2693, 3716, 4682]
733 Building the System of National Accounts - volume measures 103 > 3 LASPEYRES VOLUME INDEX [3595]
733 Building the System of National Accounts - volume measures 244 > 3 LASPEYRES VOLUME INDEX [3595]
733 Building the System of National Accounts - volume measures 116 > 3 LASPEYRES PRICE INDEX [2690, 3593, 3715]
733 Building the System of National Accounts - volume measures 116 > 3 PAASCHE VOLUME INDEX [4684]
733 Building the System of National Accounts - volume measures 79 > 3 PAASCHE PRICE INDEX [2693, 3716, 4682]
733 Building the System of National Accounts - volume measures 192 > 3 LASPEYRES VOLUME INDEX [3595]
733 Building the System of National Accounts - volume measures 192 > 3 PAASCHE PRICE INDEX [2693, 3716, 4682]
733 Building the System of National Accounts - volume measures 169 > 3 GROSS CAPITAL FORMATION [2806]
733 Building the System of National Accounts - volume m

768
768 Merging statistics and geospatial information, 2014 projects - Poland 98 > 3 LAND AND BUILDING [1336, 3563]
769
769 Merging statistics and geospatial information, 2015 projects - Latvia 118 > 3 PLACE OF RESIDENCE [4896]
770
771
772
772 Merging statistics and geospatial information, 2015 projects - Slovenia 293 > 3 PERCEIVED HEALTH STATUS [4795]
772 Merging statistics and geospatial information, 2015 projects - Slovenia 162 > 3 DISCLOSURE CONTROL METHOD [1656, 4837, 5583]
772 Merging statistics and geospatial information, 2015 projects - Slovenia 226 > 3 DISCLOSURE CONTROL METHOD [1656, 4837, 5583]
773
773 National accounts background 208 > 3 EXCESSIVE DEFICIT PROCEDURE [2200]
774
775
775 Quality of life indicators - measuring quality of life 54 > 3 QUALITY OF LIFE [5263]
775 Quality of life indicators - measuring quality of life 54 > 3 QUALITY OF LIFE [5263]
775 Quality of life indicators - measuring quality of life 296 > 3 QUALITY OF LIFE [5263]
775 Quality of life indicators 

783 Short-term business statistics - seasonal adjustment methods 129 > 3 INDUSTRIAL PRODUCTION INDEX [3170]
783 Short-term business statistics - seasonal adjustment methods 129 > 3 NOT SEASONALLY ADJUSTED [4410]
784
784 Short-term business statistics and (annual) chain linking 142 > 3 RATE OF CHANGE [247, 5346, 5937, 6640]
784 Short-term business statistics and (annual) chain linking 142 > 3 RATE OF CHANGE [247, 5346, 5937, 6640]
785
786
787
788
788 Merging statistics and geospatial information, 2015 projects - Austria 208 > 3 PLACE OF RESIDENCE [4896]
789
789 Merging statistics and geospatial information, 2014 projects - Portugal 197 > 3 CONSUMER PRICE INDEX [1124, 1125, 2873]
790
790 Statistics in development cooperation - coordination 222 > 3 MILLENNIUM DEVELOPMENT GOAL [3973, 3974]
790 Statistics in development cooperation - coordination 222 > 3 MILLENNIUM DEVELOPMENT GOAL [3973, 3974]
790 Statistics in development cooperation - coordination 434 > 4 INTERNATIONAL DEVELOPMENT ASSOCI

807 Statistics in development cooperation - advocacy 385 > 3 MILLENNIUM DEVELOPMENT GOAL [3973, 3974]
808
808 Statistics in development cooperation - improving statistical capacity 294 > 3 MILLENNIUM DEVELOPMENT GOAL [3973, 3974]
808 Statistics in development cooperation - improving statistical capacity 421 > 4 NATIONAL STATISTICAL SYSTEM (NSS) [4162]
809
809 Statistics in development cooperation - EU support to partner countries 95 > 4 NATIONAL STATISTICAL SYSTEM (NSS) [4162]
809 Statistics in development cooperation - EU support to partner countries 252 > 3 SECTOR WIDE APPROACH [5802]
810
811
811 Statistics in development cooperation - development indicators 155 > 4 NATIONAL STATISTICAL SYSTEM (NSS) [4162]
812
813
813 Merging statistics and geospatial information, 2013 projects - Italy 266 > 3 PLACE OF RESIDENCE [4896]
813 Merging statistics and geospatial information, 2013 projects - Italy 84 > 3 LOSS OF INFORMATION [3745]
814
815
816
816 Merging statistics and geospatial informatio

849 Energy balance - old methodology 265 > 3 STATISTICAL DATA COLLECTION [6088]
850
850 Energy balance - new methodology 1489 > 3 COKE OVEN COKE [919]
850 Energy balance - new methodology 1489 > 3 GAS WORK GAS [2620]
850 Energy balance - new methodology 1489 > 3 COKE OVEN GAS [920]
850 Energy balance - new methodology 1489 > 3 BLAST FURNACE GAS [550]
850 Energy balance - new methodology 138 > 3 STATISTICAL DATA COLLECTION [6088]
851
852
852 Differences between balance of payments and foreign trade statistics 67 > 3 BALANCE OF PAYMENT [382, 384, 385, 386, 388, 390, 392, 394, 396, 398, 400, 402, 404, 406, 959, 1317, 6403, 6407]
852 Differences between balance of payments and foreign trade statistics 117 > 4 CENTRE OF ECONOMIC INTEREST [777]
853
853 Disability statistics background - Labour force survey - proxy analysis 204 > 3 LEVEL OF EDUCATION [3628, 4261, 4262, 4956, 5409]
854
854 Disability statistics background - Labour force survey - non-response analysis 189 > 3 LEVEL OF EDUCATION

In [23]:
res['3-Phrases']= res['3-Phrases'].apply(lambda x: dict(Counter(x).most_common()))
res['4-Phrases']= res['4-Phrases'].apply(lambda x: dict(Counter(x).most_common()))
res['5-and-above-Phrases']= res['5-and-above-Phrases'].apply(lambda x: dict(Counter(x).most_common()))



In [24]:
res=pd.merge(SE_df[['id','title']],res,left_on='id',right_on='doc_id',how='left')
res.drop(columns=['doc_id'],inplace=True)
res

Unnamed: 0,id,title,3-Phrases,4-Phrases,5-and-above-Phrases,Terms
0,7,Accidents at work statistics,{},{'HEALTH AND SOCIAL WORK': 1},{},"{' health and social work': 1, ' Incomes of he..."
1,13,National accounts and GDP,"{'RATE OF CHANGE': 15, 'GROSS VALUE ADDED': 6,...",{'HEALTH AND SOCIAL WORK': 3},"{'TAX ON PRODUCTION AND IMPORT': 2, 'EXTERNAL ...",{' Annualised growth rate (annualised rate of ...
2,16,Railway safety statistics in the EU,,,,
3,17,Railway freight transport statistics,,,,
4,18,Railway passenger transport statistics - quart...,,,,
...,...,...,...,...,...,...
860,10456,"Merging statistics and geospatial information,...",,,,
861,10470,"Merging statistics and geospatial information,...",,,,
862,10506,Methods for compiling PEEIs in short-term busi...,"{'PRODUCER PRICE INDEX': 3, 'CONSUMER PRICE IN...",{},{},"{' Input producer price indices': 3, ' Output ..."
863,10531,Building the System of National Accounts - adm...,"{'GROSS VALUE ADDED': 4, 'BALANCE OF PAYMENT':...","{'CONSUMPTION OF FIXED CAPITAL': 2, 'VALUE ADD...",{'FINANCIAL INTERMEDIATION SERVICE INDIRECTLY ...,"{' Deductible value added tax (VAT)': 1, ' Inv..."


In [25]:
outfile = file_name('Phrase_Matcher_SE_OECD','xlsx')
res.to_excel(outfile)
