# Hypothese 2

In [687]:
import gc
from collections import Counter

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import pyarrow.parquet as pq
import scipy.stats as stats
from sklearn.linear_model import LinearRegression

## Daten einlesen

In [688]:
Eurostat_Investitionen = pd.read_csv('data/input/Eurostat_Investitionen.csv')
Eurostat_Investitionen.head(5)

Unnamed: 0,Land,Jahr,Betrag in Mio. Euro
0,Österreich,2000,4028.67
1,Österreich,2001,4393.09
2,Österreich,2002,4684.313
3,Österreich,2003,5041.98
4,Österreich,2004,5249.546


In [689]:
Investitionssummen_Länder = Eurostat_Investitionen.groupby('Land')['Betrag in Mio. Euro'].sum().reset_index()
Investitionssummen_Länder.head(27)

Unnamed: 0,Land,Betrag in Mio. Euro
0,Belgien,233201.336
1,Bulgarien,7031.177
2,Deutschland,1931488.613
3,Dänemark,174858.568
4,Estland,6807.736
5,Finnland,151550.979
6,Frankreich,1079692.751
7,Griechenland,38278.2
8,Irland,76188.24
9,Italien,489078.712


In [690]:
parquet_file = "data/input//Data_OpenAlex.parquet"
parquet_dataset = pq.ParquetFile(parquet_file)

print(f"Anzahl Zeilen: {parquet_dataset.metadata.num_rows:,}")
print(f"Anzahl Spalten: {parquet_dataset.metadata.num_columns}")
print(f"Anzahl Row Groups: {parquet_dataset.num_row_groups}")

Anzahl Zeilen: 16,751,942
Anzahl Spalten: 9
Anzahl Row Groups: 16


In [691]:
# Zeige Spaltennamen
print(f"\nSpalten in der Datei:")
schema = parquet_dataset.schema_arrow
for i, field in enumerate(schema):
    print(f"  {i+1:2d}. {field.name:20s} ({field.type})")


Spalten in der Datei:
   1. paper_id             (string)
   2. doi                  (string)
   3. fwci                 (float)
   4. title                (string)
   5. publication_year     (int16)
   6. cited_by_count       (int32)
   7. institution_count    (int16)
   8. country_codes        (string)
   9. institutions         (string)


In [692]:
print(f"Lade kleine Stichprobe (1000 Zeilen)")
first_batch = parquet_dataset.read_row_group(0, columns=['country_codes']).to_pandas().head(1000)

print(f"Stichprobe geladen: {len(first_batch)} Zeilen")
print(f"Beispiel country_codes:")

Lade kleine Stichprobe (1000 Zeilen)
Stichprobe geladen: 1000 Zeilen
Beispiel country_codes:


In [693]:
sample_codes = first_batch['country_codes'].dropna().head(10)
for i, code in enumerate(sample_codes, 1):
    print(f"  {i:2d}. {code}")

df_openalex_sample = first_batch

   1. FR,US
   2. AU,CA,CH,GB,LB,NO,US,ZA
   3. GB,US
   4. DE,NZ,US
   5. CH,DE,DK,ES,US
   6. CH
   7. AU,BE,CH,CZ,DE,ES,FI,FR,GB,IT,LU,NL,SE,TR
   8. CH
   9. AR,AU,BE,BR,CA,CH,DE,ES,FI,FR,GB,HU,IR,IT,MT,NL,NZ,PL,PT,SE,SI,TW,US
  10. AU,BE,CH,DE,ES,FR,GB,IT,NO,PL,SE


## Länder zählen

In [694]:
def count_countries_efficient(parquet_file, chunk_size=100000):
    """
    Zählung der Länderhäufigkeiten in dem Dataset mit Chunking für geringeren Speicherbefarf
    """
    country_counter = Counter()
    total_papers = 0
    
    print(f"Anzahl Row Groups: {parquet_dataset.num_row_groups}")
    print(f"Chunk-Größe: {chunk_size:,} Zeilen")
    
    # Verarbeite die Datei in Batches
    for batch_idx, batch in enumerate(parquet_dataset.iter_batches(
        batch_size=chunk_size, 
        columns=['country_codes']
    )):
        
        # Konvertiere zu Pandas DataFrame
        df_chunk = batch.to_pandas()
        
        for country_codes_str in df_chunk['country_codes']:
            if pd.notna(country_codes_str) and country_codes_str:
                # Splitte Ländercodes
                if isinstance(country_codes_str, str):
                    # Verschiedene Trennzeichen berücksichtigen
                    countries = []
                    for separator in [',', ';', '|', ' ']:
                        if separator in country_codes_str:
                            countries = [c.strip().upper() for c in country_codes_str.split(separator) if c.strip()]
                            break
                    
                    # Falls keine Trennzeichen gefunden, als einzelnes Land behandeln
                    if not countries:
                        countries = [country_codes_str.strip().upper()]
                    
                    # Zähle jedes Land
                    for country in countries:
                        if country and len(country) <= 3:  # Filtere gültige Ländercodes
                            country_counter[country] += 1
        
        total_papers += len(df_chunk)
        
        if (batch_idx + 1) % 10 == 0:
            print(f"Verarbeitet: {total_papers:,} Papers, {len(country_counter)} verschiedene Länder gefunden")
        
        # Speicher freigeben
        del df_chunk
        gc.collect()
    
    print(f"Total verarbeitete Papers: {total_papers:,}")
    print(f"Verschiedene Länder gefunden: {len(country_counter)}")
    
    return country_counter, total_papers

In [695]:
# Starte die effiziente Verarbeitung
print("Starte Länderzählung...")
country_counts, total_papers = count_countries_efficient(parquet_file)

Starte Länderzählung...
Anzahl Row Groups: 16
Chunk-Größe: 100,000 Zeilen
Verarbeitet: 1,000,000 Papers, 222 verschiedene Länder gefunden
Verarbeitet: 2,000,000 Papers, 223 verschiedene Länder gefunden
Verarbeitet: 3,000,000 Papers, 223 verschiedene Länder gefunden
Verarbeitet: 4,000,000 Papers, 223 verschiedene Länder gefunden
Verarbeitet: 5,000,000 Papers, 224 verschiedene Länder gefunden
Verarbeitet: 6,000,000 Papers, 224 verschiedene Länder gefunden
Verarbeitet: 7,000,000 Papers, 224 verschiedene Länder gefunden
Verarbeitet: 8,000,000 Papers, 224 verschiedene Länder gefunden
Verarbeitet: 9,000,000 Papers, 224 verschiedene Länder gefunden
Verarbeitet: 10,000,000 Papers, 225 verschiedene Länder gefunden
Verarbeitet: 11,000,000 Papers, 225 verschiedene Länder gefunden
Verarbeitet: 12,000,000 Papers, 226 verschiedene Länder gefunden
Verarbeitet: 13,000,000 Papers, 226 verschiedene Länder gefunden
Verarbeitet: 14,000,000 Papers, 226 verschiedene Länder gefunden
Verarbeitet: 15,000,000 P

In [696]:
print(f"Top 20 Länder nach Anzahl Papers")
top_countries = country_counts.most_common(20)
for rank, (country, count) in enumerate(top_countries, 1):
    percentage = (count / total_papers) * 100
    print(f"{rank:2d}. {country:3s}: {count:8,} Papers ({percentage:.2f}%)")

Top 20 Länder nach Anzahl Papers
 1. GB : 3,057,191 Papers (18.25%)
 2. DE : 2,684,903 Papers (16.03%)
 3. FR : 2,340,240 Papers (13.97%)
 4. US : 1,715,128 Papers (10.24%)
 5. IT : 1,628,325 Papers (9.72%)
 6. ES : 1,490,117 Papers (8.90%)
 7. RU : 1,454,541 Papers (8.68%)
 8. NL :  883,788 Papers (5.28%)
 9. PL :  841,244 Papers (5.02%)
10. CH :  700,694 Papers (4.18%)
11. CN :  588,488 Papers (3.51%)
12. SE :  565,233 Papers (3.37%)
13. BE :  533,431 Papers (3.18%)
14. PT :  429,884 Papers (2.57%)
15. DK :  405,820 Papers (2.42%)
16. AT :  383,322 Papers (2.29%)
17. CA :  368,065 Papers (2.20%)
18. UA :  367,879 Papers (2.20%)
19. AU :  356,857 Papers (2.13%)
20. NO :  333,833 Papers (1.99%)


In [697]:
# Erstelle DataFrame für weitere Verarbeitung
countries_df = pd.DataFrame([
    {'Country_Code': country, 'Paper_Count': count}
    for country, count in country_counts.items()
]).sort_values('Paper_Count', ascending=False).reset_index(drop=True)

print(f"Länder DataFrame erstellt mit {len(countries_df)} Ländern")
countries_df.head(10)

Länder DataFrame erstellt mit 226 Ländern


Unnamed: 0,Country_Code,Paper_Count
0,GB,3057191
1,DE,2684903
2,FR,2340240
3,US,1715128
4,IT,1628325
5,ES,1490117
6,RU,1454541
7,NL,883788
8,PL,841244
9,CH,700694


## ISO Codes Mapping

In [698]:
# Datenaufbereitung für die Europakarte
# Mapping von ISO-2 zu ISO-3 Codes und Ländernamen
iso2_to_iso3_mapping = {
    'AT': 'AUT', 'BE': 'BEL', 'BG': 'BGR', 'HR': 'HRV', 'CY': 'CYP',
    'CZ': 'CZE', 'DK': 'DNK', 'EE': 'EST', 'FI': 'FIN', 'FR': 'FRA',
    'DE': 'DEU', 'GR': 'GRC', 'HU': 'HUN', 'IE': 'IRL', 'IT': 'ITA',
    'LV': 'LVA', 'LT': 'LTU', 'LU': 'LUX', 'MT': 'MLT', 'NL': 'NLD',
    'PL': 'POL', 'PT': 'PRT', 'RO': 'ROU', 'SK': 'SVK', 'SI': 'SVN',
    'ES': 'ESP', 'SE': 'SWE',
    # Nicht-EU Länder
    'GB': 'GBR', 'UK': 'GBR', 'NO': 'NOR', 'CH': 'CHE', 'IS': 'ISL',
    'UA': 'UKR', 'TR': 'TUR', 'RS': 'SRB', 'BA': 'BIH', 'AL': 'ALB',
    'ME': 'MNE', 'MK': 'MKD', 'MD': 'MDA', 'BY': 'BLR', 'RU': 'RUS'
}

iso3_to_country_name = {
    'AUT': 'Austria', 'BEL': 'Belgium', 'BGR': 'Bulgaria', 'HRV': 'Croatia',
    'CYP': 'Cyprus', 'CZE': 'Czech Republic', 'DNK': 'Denmark', 'EST': 'Estonia',
    'FIN': 'Finland', 'FRA': 'France', 'DEU': 'Germany', 'GRC': 'Greece',
    'HUN': 'Hungary', 'IRL': 'Ireland', 'ITA': 'Italy', 'LVA': 'Latvia',
    'LTU': 'Lithuania', 'LUX': 'Luxembourg', 'MLT': 'Malta', 'NLD': 'Netherlands',
    'POL': 'Poland', 'PRT': 'Portugal', 'ROU': 'Romania', 'SVK': 'Slovakia',
    'SVN': 'Slovenia', 'ESP': 'Spain', 'SWE': 'Sweden',
    # Nicht-EU Länder
    'GBR': 'United Kingdom', 'NOR': 'Norway', 'CHE': 'Switzerland', 'ISL': 'Iceland',
    'UKR': 'Ukraine', 'TUR': 'Turkey', 'SRB': 'Serbia', 'BIH': 'Bosnia and Herzegovina',
    'ALB': 'Albania', 'MNE': 'Montenegro', 'MKD': 'North Macedonia', 'MDA': 'Moldova',
    'BLR': 'Belarus', 'RUS': 'Russia'
}

def prepare_europe_data(country_counts):
    """
    Bereitet die Länderdaten für die Europakarte vor.
    """
    europe_data = {}
    
    # Gehe alle gefundenen Ländercodes durch
    for country_code, count in country_counts.items():
        # Versuche verschiedene Formate
        iso3_code = None
        
        # Falls bereits ISO-3 Format
        if len(country_code) == 3 and country_code in iso3_to_country_name:
            iso3_code = country_code
        # Falls ISO-2 Format
        elif len(country_code) == 2 and country_code in iso2_to_iso3_mapping:
            iso3_code = iso2_to_iso3_mapping[country_code]
        
        # Wenn ISO-3 Code gefunden und es ein europäisches Land ist
        if iso3_code and iso3_code in iso3_to_country_name:
            country_name = iso3_to_country_name[iso3_code]
            europe_data[country_name] = count
    
    return europe_data


In [699]:
# Erstelle Daten für die Europakarte
europe_research_data = prepare_europe_data(country_counts)
print(f"Europäische Länder gefunden: {len(europe_research_data)}")

Europäische Länder gefunden: 41


In [700]:
# Zeige EU-Länder mit ihren Paperzahlen
eu_countries = [
    "Austria",
    "Belgium",
    "Bulgaria",
    "Croatia",
    "Cyprus",
    "Czech Republic",
    "Denmark",
    "Estonia",
    "Finland",
    "France",
    "Germany",
    "Greece",
    "Hungary",
    "Ireland",
    "Italy",
    "Latvia",
    "Lithuania",
    "Luxembourg",
    "Malta",
    "Netherlands",
    "Poland",
    "Portugal",
    "Romania",
    "Slovakia",
    "Slovenia",
    "Spain",
    "Sweden"
]


eu_research_data = {
    country: count
    for country, count in europe_research_data.items()
    if country in eu_countries
}
print(f"\nDaten für {len(eu_research_data)} EU-Länder verfügbar")


Daten für 27 EU-Länder verfügbar


In [701]:
eu_research_data

{'France': 2340240,
 'Germany': 2684903,
 'Denmark': 405820,
 'Spain': 1490117,
 'Belgium': 533431,
 'Czech Republic': 329260,
 'Finland': 297821,
 'Italy': 1628325,
 'Luxembourg': 43109,
 'Netherlands': 883788,
 'Sweden': 565233,
 'Hungary': 196164,
 'Malta': 14755,
 'Poland': 841244,
 'Portugal': 429884,
 'Slovenia': 95454,
 'Austria': 383322,
 'Greece': 285386,
 'Ireland': 225952,
 'Estonia': 47562,
 'Romania': 235189,
 'Bulgaria': 89259,
 'Croatia': 120725,
 'Lithuania': 63132,
 'Latvia': 43808,
 'Slovakia': 112700,
 'Cyprus': 23566}

In [702]:
print(f"EU-Länder Forschungsoutput")
for country in sorted(eu_research_data.keys()):
    count = eu_research_data[country]
    print(f"{country:20s}: {count:8,} Papers")

EU-Länder Forschungsoutput
Austria             :  383,322 Papers
Belgium             :  533,431 Papers
Bulgaria            :   89,259 Papers
Croatia             :  120,725 Papers
Cyprus              :   23,566 Papers
Czech Republic      :  329,260 Papers
Denmark             :  405,820 Papers
Estonia             :   47,562 Papers
Finland             :  297,821 Papers
France              : 2,340,240 Papers
Germany             : 2,684,903 Papers
Greece              :  285,386 Papers
Hungary             :  196,164 Papers
Ireland             :  225,952 Papers
Italy               : 1,628,325 Papers
Latvia              :   43,808 Papers
Lithuania           :   63,132 Papers
Luxembourg          :   43,109 Papers
Malta               :   14,755 Papers
Netherlands         :  883,788 Papers
Poland              :  841,244 Papers
Portugal            :  429,884 Papers
Romania             :  235,189 Papers
Slovakia            :  112,700 Papers
Slovenia            :   95,454 Papers
Spain               

In [703]:
if eu_research_data:
    max_papers = max(eu_research_data.values())
    min_papers = min(eu_research_data.values())

    eu_research_normalized = {}
    for country, count in eu_research_data.items():
        # Normalisierung auf 0-100 Skala
        normalized_value = (
            ((count - min_papers) / (max_papers - min_papers)) * 100
            if max_papers > min_papers
            else 50
        )
        eu_research_normalized[country] = round(normalized_value, 1)

    print(f"Normalisierte Werte für Visualisierung")
    sorted_normalized = sorted(
        eu_research_normalized.items(), key=lambda x: x[1], reverse=True
    )
    for country, norm_value in sorted_normalized[:10]:  # Top 10
        original_count = eu_research_data[country]
        print(f"{country:20s}: {norm_value:5.1f} ({original_count:,} Papers)")

Normalisierte Werte für Visualisierung
Germany             : 100.0 (2,684,903 Papers)
France              :  87.1 (2,340,240 Papers)
Italy               :  60.4 (1,628,325 Papers)
Spain               :  55.3 (1,490,117 Papers)
Netherlands         :  32.5 (883,788 Papers)
Poland              :  31.0 (841,244 Papers)
Sweden              :  20.6 (565,233 Papers)
Belgium             :  19.4 (533,431 Papers)
Portugal            :  15.5 (429,884 Papers)
Denmark             :  14.6 (405,820 Papers)


## Investitions- und OpenAlex-Daten joinen

In [704]:
print("Investitionsdaten")
print(Investitionssummen_Länder.head())

Investitionsdaten
          Land  Betrag in Mio. Euro
0      Belgien           233201.336
1    Bulgarien             7031.177
2  Deutschland          1931488.613
3     Dänemark           174858.568
4      Estland             6807.736


In [705]:
# Mapping der Ländernamen von Eurostat zu OpenAlex (englische Namen)
eurostat_to_openalex_mapping = {
    'Deutschland': 'Germany',
    'Frankreich': 'France', 
    'Italien': 'Italy',
    'Spanien': 'Spain',
    'Niederlande': 'Netherlands',
    'Polen': 'Poland',
    'Belgien': 'Belgium',
    'Österreich': 'Austria',
    'Schweden': 'Sweden',
    'Tschechien': 'Czech Republic',
    'Dänemark': 'Denmark',
    'Finnland': 'Finland',
    'Portugal': 'Portugal',
    'Rumänien': 'Romania',
    'Ungarn': 'Hungary',
    'Griechenland': 'Greece',
    'Bulgarien': 'Bulgaria',
    'Slowakei': 'Slovakia',
    'Kroatien': 'Croatia',
    'Irland': 'Ireland',
    'Litauen': 'Lithuania',
    'Slowenien': 'Slovenia',
    'Lettland': 'Latvia',
    'Estland': 'Estonia',
    'Zypern': 'Cyprus',
    'Luxemburg': 'Luxembourg',
    'Malta': 'Malta'
}

In [706]:
# Zeige EU-Länder mit ihren Paperzahlen
print(f"EU-Länder Forschungsoutput")
for country in sorted(eu_research_data.keys()):
    count = eu_research_data[country]
    print(f"{country:20s}: {count:8,} Papers")

print(f"\nDaten für {len(eu_research_data)} EU-Länder verfügbar")

EU-Länder Forschungsoutput
Austria             :  383,322 Papers
Belgium             :  533,431 Papers
Bulgaria            :   89,259 Papers
Croatia             :  120,725 Papers
Cyprus              :   23,566 Papers
Czech Republic      :  329,260 Papers
Denmark             :  405,820 Papers
Estonia             :   47,562 Papers
Finland             :  297,821 Papers
France              : 2,340,240 Papers
Germany             : 2,684,903 Papers
Greece              :  285,386 Papers
Hungary             :  196,164 Papers
Ireland             :  225,952 Papers
Italy               : 1,628,325 Papers
Latvia              :   43,808 Papers
Lithuania           :   63,132 Papers
Luxembourg          :   43,109 Papers
Malta               :   14,755 Papers
Netherlands         :  883,788 Papers
Poland              :  841,244 Papers
Portugal            :  429,884 Papers
Romania             :  235,189 Papers
Slovakia            :  112,700 Papers
Slovenia            :   95,454 Papers
Spain               

In [707]:
# Erstelle normalisierte Werte für bessere Visualisierung (0-100 Skala)
max_papers = max(eu_research_data.values())
min_papers = min(eu_research_data.values())

eu_research_normalized = {}
for country, count in eu_research_data.items():
    # Normalisierung auf 0-100 Skala
    normalized_value = ((count - min_papers) / (max_papers - min_papers)) * 100 if max_papers > min_papers else 50
    eu_research_normalized[country] = round(normalized_value, 1)

print(f"Normalisierte Werte für Visualisierung")
sorted_normalized = sorted(eu_research_normalized.items(), key=lambda x: x[1], reverse=True)
for country, norm_value in sorted_normalized[:10]:  # Top 10
    original_count = eu_research_data[country]
    print(f"{country:20s}: {norm_value:5.1f} ({original_count:,} Papers)")

Normalisierte Werte für Visualisierung
Germany             : 100.0 (2,684,903 Papers)
France              :  87.1 (2,340,240 Papers)
Italy               :  60.4 (1,628,325 Papers)
Spain               :  55.3 (1,490,117 Papers)
Netherlands         :  32.5 (883,788 Papers)
Poland              :  31.0 (841,244 Papers)
Sweden              :  20.6 (565,233 Papers)
Belgium             :  19.4 (533,431 Papers)
Portugal            :  15.5 (429,884 Papers)
Denmark             :  14.6 (405,820 Papers)


In [708]:
def calculate_research_efficiency(research_data, investment_data, country_mapping):
    """
    Berechnet die Forschungseffizienz (Papers pro Million Euro Investition)
    """
    efficiency_data = {}
    
    # Konvertiere Investitionsdaten in Dictionary
    investment_dict = {}
    for _, row in investment_data.iterrows():
        german_name = row['Land']
        amount_mio = row['Betrag in Mio. Euro']
        
        # Übersetze deutschen Namen zu englischem Namen
        if german_name in country_mapping:
            english_name = country_mapping[german_name]
            investment_dict[english_name] = amount_mio
    
    print(f"Investitionsdaten für {len(investment_dict)} Länder gefunden:")
    for country, amount in sorted(investment_dict.items(), key=lambda x: x[1], reverse=True)[:10]:
        print(f"  {country:20s}: {amount:8,.1f} Mio. €")
    
    # Berechne Effizienz für Länder mit beiden Datensätzen
    for country in research_data.keys():
        if country in investment_dict and country in research_data:
            papers = research_data[country]
            investment_mio = investment_dict[country]
            
            # Berechne Papers pro Million Euro
            if investment_mio > 0:
                efficiency = papers / investment_mio
                efficiency_data[country] = efficiency
    
    return efficiency_data, investment_dict

In [709]:
efficiency_data, investment_dict = calculate_research_efficiency(
    eu_research_data, Investitionssummen_Länder, eurostat_to_openalex_mapping
)

Investitionsdaten für 27 Länder gefunden:
  Germany             : 1,931,488.6 Mio. €
  France              : 1,079,692.8 Mio. €
  Italy               : 489,078.7 Mio. €
  Netherlands         : 323,865.3 Mio. €
  Spain               : 314,667.1 Mio. €
  Sweden              : 306,923.5 Mio. €
  Belgium             : 233,201.3 Mio. €
  Austria             : 215,647.3 Mio. €
  Denmark             : 174,858.6 Mio. €
  Finland             : 151,551.0 Mio. €


In [710]:
# Sortiere nach Effizienz
efficiency_sorted = sorted(efficiency_data.items(), key=lambda x: x[1], reverse=True)

print(f"Top 10 effizienteste Länder:")
for rank, (country, efficiency) in enumerate(efficiency_sorted[:10], 1):
    papers = eu_research_data[country]
    investment = investment_dict[country]
    print(f"{rank:2d}. {country:15s}: {efficiency:6.1f} Papers/Mio.€ "
          f"({papers:,} Papers, {investment:,.0f} Mio.€)")

Top 10 effizienteste Länder:
 1. Romania        :   14.2 Papers/Mio.€ (235,189 Papers, 16,528 Mio.€)
 2. Latvia         :   13.3 Papers/Mio.€ (43,808 Papers, 3,305 Mio.€)
 3. Bulgaria       :   12.7 Papers/Mio.€ (89,259 Papers, 7,031 Mio.€)
 4. Croatia        :   12.0 Papers/Mio.€ (120,725 Papers, 10,065 Mio.€)
 5. Malta          :   11.9 Papers/Mio.€ (14,755 Papers, 1,240 Mio.€)
 6. Cyprus         :   10.0 Papers/Mio.€ (23,566 Papers, 2,353 Mio.€)
 7. Poland         :    8.9 Papers/Mio.€ (841,244 Papers, 94,100 Mio.€)
 8. Slovakia       :    8.8 Papers/Mio.€ (112,700 Papers, 12,762 Mio.€)
 9. Lithuania      :    8.1 Papers/Mio.€ (63,132 Papers, 7,762 Mio.€)
10. Portugal       :    7.7 Papers/Mio.€ (429,884 Papers, 55,856 Mio.€)


In [711]:
max_efficiency = max(efficiency_data.values())
min_efficiency = min(efficiency_data.values())

# Verwende die originalen Effizienzwerte direkt (keine 0-100 Normalisierung)
efficiency_normalized = efficiency_data.copy()

print(f"Effizienzwerte (Papers pro Mio. €)")
efficiency_norm_sorted = sorted(efficiency_normalized.items(), key=lambda x: x[1], reverse=True)
for country, eff in efficiency_norm_sorted:
    papers = eu_research_data[country]
    investment = investment_dict[country]
    print(f"{country:15s}: {eff:6.1f} Papers/Mio.€ ({papers:,} Papers, {investment:,.0f} Mio.€)")

print(f"\nBereich: {min_efficiency:.1f} - {max_efficiency:.1f} Papers/Mio.€")

Effizienzwerte (Papers pro Mio. €)
Romania        :   14.2 Papers/Mio.€ (235,189 Papers, 16,528 Mio.€)
Latvia         :   13.3 Papers/Mio.€ (43,808 Papers, 3,305 Mio.€)
Bulgaria       :   12.7 Papers/Mio.€ (89,259 Papers, 7,031 Mio.€)
Croatia        :   12.0 Papers/Mio.€ (120,725 Papers, 10,065 Mio.€)
Malta          :   11.9 Papers/Mio.€ (14,755 Papers, 1,240 Mio.€)
Cyprus         :   10.0 Papers/Mio.€ (23,566 Papers, 2,353 Mio.€)
Poland         :    8.9 Papers/Mio.€ (841,244 Papers, 94,100 Mio.€)
Slovakia       :    8.8 Papers/Mio.€ (112,700 Papers, 12,762 Mio.€)
Lithuania      :    8.1 Papers/Mio.€ (63,132 Papers, 7,762 Mio.€)
Portugal       :    7.7 Papers/Mio.€ (429,884 Papers, 55,856 Mio.€)
Greece         :    7.5 Papers/Mio.€ (285,386 Papers, 38,278 Mio.€)
Estonia        :    7.0 Papers/Mio.€ (47,562 Papers, 6,808 Mio.€)
Hungary        :    6.0 Papers/Mio.€ (196,164 Papers, 32,917 Mio.€)
Slovenia       :    5.4 Papers/Mio.€ (95,454 Papers, 17,801 Mio.€)
Czech Republic :    5.1 Pa

## EU Karte für Forschungseffizienz (Papers pro Millionen Euro)

In [712]:
# EU-Mitgliedsstaaten (Stand 2024)
eu_countries = [
    'Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czech Republic',
    'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Greece',
    'Hungary', 'Ireland', 'Italy', 'Latvia', 'Lithuania', 'Luxembourg',
    'Malta', 'Netherlands', 'Poland', 'Portugal', 'Romania', 'Slovakia',
    'Slovenia', 'Spain', 'Sweden'
]

# ISO-Codes für EU-Länder (für plotly)
eu_iso_codes = {
    'Austria': 'AUT', 'Belgium': 'BEL', 'Bulgaria': 'BGR', 'Croatia': 'HRV',
    'Cyprus': 'CYP', 'Czech Republic': 'CZE', 'Denmark': 'DNK', 'Estonia': 'EST',
    'Finland': 'FIN', 'France': 'FRA', 'Germany': 'DEU', 'Greece': 'GRC',
    'Hungary': 'HUN', 'Ireland': 'IRL', 'Italy': 'ITA', 'Latvia': 'LVA',
    'Lithuania': 'LTU', 'Luxembourg': 'LUX', 'Malta': 'MLT', 'Netherlands': 'NLD',
    'Poland': 'POL', 'Portugal': 'PRT', 'Romania': 'ROU', 'Slovakia': 'SVK',
    'Slovenia': 'SVN', 'Spain': 'ESP', 'Sweden': 'SWE'
}


In [713]:
def create_efficiency_map(data_dict, title="<b>EU Forschungseffizienz</b>", show_non_eu=True):
    """
    Erstellt eine Effizienz-Karte:
    GRÜN = Hohe Effizienz (viele Papers pro Euro)
    ROT = Niedrige Effizienz (wenige Papers pro Euro)
    """
    
    # EU-Länder Daten vorbereiten
    all_locations = []
    all_values = []
    all_texts = []
    
    for country in eu_countries:
        if country in data_dict:
            all_locations.append(eu_iso_codes[country])
            all_values.append(data_dict[country])
            all_texts.append(country)
    
    if not all_locations:
        print("  Keine Daten für EU-Länder gefunden!")
        return None
    
    # Berechne die Grenzwerte für die Farbskala
    min_value = min(all_values)
    max_value = max(all_values)
    
    fig = go.Figure()
    
    # EU-Länder mit Grün zu Rot-Farbpalette
    # RdYlGn für Niedrige Werte = ROT, Hohe Werte = GRÜN
    fig.add_trace(go.Choropleth(
        locations=all_locations,
        z=all_values,
        locationmode='ISO-3',
        colorscale='RdYlGn',
        autocolorscale=False,
        text=all_texts,
        hovertemplate='<b>%{text}</b><br>Effizienz: %{z:.1f} Papers/Mio. €<br><extra></extra>',
        colorbar=dict(
            title=dict(
                text="Paper pro Mio. € <br>",
                font=dict(size=14)
            ),
            tickmode="linear",
            tick0=min_value,
            dtick=(max_value - min_value) / 5,
            tickformat=".1f",
            xanchor="left",
            y=0.5,
            yanchor="middle"
        ),
        name="EU-Länder",
        marker_line_color='white',
        marker_line_width=0.5
    ))
    
    # Nicht-EU-Länder in Grau
    if show_non_eu:
        non_eu_european = [
            'Albania', 'Andorra', 'Belarus', 'Bosnia and Herzegovina', 'Iceland',
            'Liechtenstein', 'Moldova', 'Monaco', 'Montenegro', 'North Macedonia',
            'Norway', 'San Marino', 'Serbia', 'Switzerland', 'Ukraine', 'United Kingdom',
            'Vatican City', 'Kosovo', 'Turkey', 'Russia'
        ]
        
        non_eu_iso = {
            'Albania': 'ALB', 'Andorra': 'AND', 'Belarus': 'BLR', 
            'Bosnia and Herzegovina': 'BIH', 'Iceland': 'ISL', 'Liechtenstein': 'LIE',
            'Moldova': 'MDA', 'Monaco': 'MCO', 'Montenegro': 'MNE', 
            'North Macedonia': 'MKD', 'Norway': 'NOR', 'San Marino': 'SMR',
            'Serbia': 'SRB', 'Switzerland': 'CHE', 'Ukraine': 'UKR', 
            'United Kingdom': 'GBR', 'Vatican City': 'VAT', 'Kosovo': 'XKX',
            'Turkey': 'TUR', 'Russia': 'RUS'
        }
        
        non_eu_locations = []
        non_eu_texts = []
        
        for country in non_eu_european:
            if country in non_eu_iso and non_eu_iso[country] != '':
                non_eu_locations.append(non_eu_iso[country])
                non_eu_texts.append(country)
        
        if non_eu_locations:
            fig.add_trace(go.Choropleth(
                locations=non_eu_locations,
                z=[1] * len(non_eu_locations),
                locationmode='ISO-3',
                colorscale=[[0, '#f0f0f0'], [1, '#f0f0f0']],
                showscale=False,
                text=non_eu_texts,
                hovertemplate='<b>%{text}</b><br>Nicht-EU Land<extra></extra>',
                name="Nicht-EU Länder",
                marker_line_color='white',
                marker_line_width=0.5
            ))
    
    fig.update_layout(
        title=dict(
            text=title,
            x=0.5,
            font=dict(weight="bold")
        ),
        geo=dict(
            showframe=False,
            showcoastlines=True,
            coastlinecolor="lightgray",
            projection_type='natural earth',
            center=dict(lat=52, lon=12),  # Zentrum nach links verschoben 
            projection_scale=1.0,  # Zoom-Level beibehalten
            lataxis=dict(range=[32.5, 70.5]),  # Vertikal unverändert
            lonaxis=dict(range=[-8, 37]),  # Horizontal nach links verschoben
            showland=True,
            landcolor='rgb(248, 248, 248)',
            showocean=True,
            oceancolor='rgb(204, 230, 255)',
            resolution=50
        ),
        width=900,
        plot_bgcolor='white',
        paper_bgcolor='white',
        margin=dict(l=0, r=0, t=50, b=0)
    )
    
    return fig, min_value, max_value

In [714]:
print(f"Verfügbare EU-Länder mit Forschungseffizienz-Daten:")
efficiency_ranking = sorted(efficiency_normalized.items(), key=lambda x: x[1], reverse=True)

for rank, (country, eff) in enumerate(efficiency_ranking, 1):
    if 'efficiency_data' in locals() and country in efficiency_data:
        papers = eu_research_data.get(country, 0) if 'eu_research_data' in locals() else 0
        investment = investment_dict.get(country, 0) if 'investment_dict' in locals() else 0
        print(f"{rank:2d}. {country:15s}: {eff:6.1f} Papers/Mio.€ "
                f"({papers:,} Papers, {investment:,.0f} Mio.€)")
    else:
        print(f"{rank:2d}. {country:15s}: {eff:6.1f} Papers/Mio.€")

Verfügbare EU-Länder mit Forschungseffizienz-Daten:
 1. Romania        :   14.2 Papers/Mio.€ (235,189 Papers, 16,528 Mio.€)
 2. Latvia         :   13.3 Papers/Mio.€ (43,808 Papers, 3,305 Mio.€)
 3. Bulgaria       :   12.7 Papers/Mio.€ (89,259 Papers, 7,031 Mio.€)
 4. Croatia        :   12.0 Papers/Mio.€ (120,725 Papers, 10,065 Mio.€)
 5. Malta          :   11.9 Papers/Mio.€ (14,755 Papers, 1,240 Mio.€)
 6. Cyprus         :   10.0 Papers/Mio.€ (23,566 Papers, 2,353 Mio.€)
 7. Poland         :    8.9 Papers/Mio.€ (841,244 Papers, 94,100 Mio.€)
 8. Slovakia       :    8.8 Papers/Mio.€ (112,700 Papers, 12,762 Mio.€)
 9. Lithuania      :    8.1 Papers/Mio.€ (63,132 Papers, 7,762 Mio.€)
10. Portugal       :    7.7 Papers/Mio.€ (429,884 Papers, 55,856 Mio.€)
11. Greece         :    7.5 Papers/Mio.€ (285,386 Papers, 38,278 Mio.€)
12. Estonia        :    7.0 Papers/Mio.€ (47,562 Papers, 6,808 Mio.€)
13. Hungary        :    6.0 Papers/Mio.€ (196,164 Papers, 32,917 Mio.€)
14. Slovenia       :    

In [715]:
fig_efficiency, min_eff_value, max_eff_value = create_efficiency_map(
    efficiency_normalized,
    "Forschungseffizienz der EU-Mitgliedsstaaten",
    show_non_eu=True
)
fig_efficiency.show()
svg_filename = "data/output/Hypothese_2.pdf"
fig_efficiency.write_image(svg_filename, format="pdf")
print(f"Karte als SVG gespeichert: {svg_filename}")


Karte als SVG gespeichert: data/output/Hypothese_2.pdf


In [716]:
best_country, best_score = efficiency_ranking[0]
worst_country, worst_score = efficiency_ranking[-1]
print(f"Effizientestes Land: {best_country} ({best_score:.1f} Papers/Mio.€)")
print(f"Ineffizientestes Land: {worst_country} ({worst_score:.1f} Papers/Mio.€)")

Effizientestes Land: Romania (14.2 Papers/Mio.€)
Ineffizientestes Land: Germany (1.4 Papers/Mio.€)


In [717]:
print(f"Top 5 Effizienteste Länder:")
for rank, (country, score) in enumerate(efficiency_ranking[:5], 1):
    print(f"  {rank}. {country}: {score:.1f} Papers/Mio.€")

Top 5 Effizienteste Länder:
  1. Romania: 14.2 Papers/Mio.€
  2. Latvia: 13.3 Papers/Mio.€
  3. Bulgaria: 12.7 Papers/Mio.€
  4. Croatia: 12.0 Papers/Mio.€
  5. Malta: 11.9 Papers/Mio.€


In [718]:
print(f"Bottom 5 Ineffizienteste Länder:")
for rank, (country, score) in enumerate(efficiency_ranking[-5:], len(efficiency_ranking)-4):
    print(f"  {rank}. {country}: {score:.1f} Papers/Mio.€")

Bottom 5 Ineffizienteste Länder:
  23. France: 2.2 Papers/Mio.€
  24. Finland: 2.0 Papers/Mio.€
  25. Sweden: 1.8 Papers/Mio.€
  26. Austria: 1.8 Papers/Mio.€
  27. Germany: 1.4 Papers/Mio.€


## Korrelationsanalyse

### Datenaufbereitung

In [719]:
# Erstelle kombinierte Datenstruktur
correlation_data = []
for english_name, paper_count in eu_research_data.items():
    # Finde entsprechenden deutschen Namen für Investitionsdaten
    german_name = None
    for german, english in eurostat_to_openalex_mapping.items():
        if english == english_name:
            german_name = german
            break
    
    if german_name:
        # Suche Investitionsdaten
        investment_data = Investitionssummen_Länder[
            Investitionssummen_Länder['Land'] == german_name
        ]
        
        if not investment_data.empty:
            investment_mio = investment_data['Betrag in Mio. Euro'].iloc[0]
            
            correlation_data.append({
                'Country': english_name,
                'German_Name': german_name,
                'Paper_Count': paper_count,
                'Investment_Mio_EUR': investment_mio,
                'Investment_Mrd_EUR': investment_mio / 1000,
                'Papers_per_Mio_EUR': paper_count / investment_mio if investment_mio > 0 else 0
            })

# Erstelle DataFrame für Analyse
df_correlation = pd.DataFrame(correlation_data)

In [720]:
print(f"\nVerfügbare Länder für Korrelationsanalyse:")
for _, row in df_correlation.iterrows():
    print(f"  {row['Country']:20s}: {row['Paper_Count']:8,} Papers, "
            f"{row['Investment_Mrd_EUR']:6.2f} Mrd €, "
            f"{row['Papers_per_Mio_EUR']:6.1f} Papers/Mio.€")


Verfügbare Länder für Korrelationsanalyse:
  France              : 2,340,240 Papers, 1079.69 Mrd €,    2.2 Papers/Mio.€
  Germany             : 2,684,903 Papers, 1931.49 Mrd €,    1.4 Papers/Mio.€
  Denmark             :  405,820 Papers, 174.86 Mrd €,    2.3 Papers/Mio.€
  Spain               : 1,490,117 Papers, 314.67 Mrd €,    4.7 Papers/Mio.€
  Belgium             :  533,431 Papers, 233.20 Mrd €,    2.3 Papers/Mio.€
  Czech Republic      :  329,260 Papers,  65.08 Mrd €,    5.1 Papers/Mio.€
  Finland             :  297,821 Papers, 151.55 Mrd €,    2.0 Papers/Mio.€
  Italy               : 1,628,325 Papers, 489.08 Mrd €,    3.3 Papers/Mio.€
  Luxembourg          :   43,109 Papers,  13.79 Mrd €,    3.1 Papers/Mio.€
  Netherlands         :  883,788 Papers, 323.87 Mrd €,    2.7 Papers/Mio.€
  Sweden              :  565,233 Papers, 306.92 Mrd €,    1.8 Papers/Mio.€
  Hungary             :  196,164 Papers,  32.92 Mrd €,    6.0 Papers/Mio.€
  Malta               :   14,755 Papers,   1.24 Mr

### Desktiptive Statistik

In [721]:
print(f"Anzahl der Paper:")
print(f"  Anzahl Länder: {len(df_correlation)}")
print(f"  Mittelwert: {df_correlation['Paper_Count'].mean():,.0f}")
print(f"  Median: {df_correlation['Paper_Count'].median():,.0f}")
print(f"  Standardabweichung: {df_correlation['Paper_Count'].std():,.0f}")
print(f"  Min: {df_correlation['Paper_Count'].min():,} ({df_correlation.loc[df_correlation['Paper_Count'].idxmin(), 'Country']})")
print(f"  Max: {df_correlation['Paper_Count'].max():,} ({df_correlation.loc[df_correlation['Paper_Count'].idxmax(), 'Country']})")

Anzahl der Paper:
  Anzahl Länder: 27
  Mittelwert: 533,709
  Median: 285,386
  Standardabweichung: 705,317
  Min: 14,755 (Malta)
  Max: 2,684,903 (Germany)


In [722]:
print(f"Forschungsinvestitionen (Mrd €):")
print(f"  Mittelwert: {df_correlation['Investment_Mrd_EUR'].mean():.2f}")
print(f"  Median: {df_correlation['Investment_Mrd_EUR'].median():.2f}")
print(f"  Standardabweichung: {df_correlation['Investment_Mrd_EUR'].std():.2f}")
print(f"  Min: {df_correlation['Investment_Mrd_EUR'].min():.2f} ({df_correlation.loc[df_correlation['Investment_Mrd_EUR'].idxmin(), 'Country']})")
print(f"  Max: {df_correlation['Investment_Mrd_EUR'].max():.2f} ({df_correlation.loc[df_correlation['Investment_Mrd_EUR'].idxmax(), 'Country']})")

Forschungsinvestitionen (Mrd €):
  Mittelwert: 210.48
  Median: 55.86
  Standardabweichung: 412.21
  Min: 1.24 (Malta)
  Max: 1931.49 (Germany)


In [723]:
print(f"Forschungseffizienz (Papers/Mio. €):")
print(f"  Mittelwert: {df_correlation['Papers_per_Mio_EUR'].mean():.1f}")
print(f"  Median: {df_correlation['Papers_per_Mio_EUR'].median():.1f}")
print(f"  Standardabweichung: {df_correlation['Papers_per_Mio_EUR'].std():.1f}")
print(f"  Min: {df_correlation['Papers_per_Mio_EUR'].min():.1f} ({df_correlation.loc[df_correlation['Papers_per_Mio_EUR'].idxmin(), 'Country']})")
print(f"  Max: {df_correlation['Papers_per_Mio_EUR'].max():.1f} ({df_correlation.loc[df_correlation['Papers_per_Mio_EUR'].idxmax(), 'Country']})")

Forschungseffizienz (Papers/Mio. €):
  Mittelwert: 6.3
  Median: 5.4
  Standardabweichung: 4.1
  Min: 1.4 (Germany)
  Max: 14.2 (Romania)


### Korrelationsanalyse: Investitionen vs. Anzahl der Paper

In [724]:
# Pearson-Korrelation
pearson_corr, pearson_p = stats.pearsonr(
    df_correlation['Investment_Mrd_EUR'], 
    df_correlation['Paper_Count']
)

print(f"Pearson-Korrelation:")
print(f"  Korrelationskoeffizient: r = {pearson_corr:.4f}")
print(f"  P-Wert: {pearson_p:.6f}")
print(f"  Signifikanz (α=0.05): {'Signifikant' if pearson_p < 0.05 else 'Nicht signifikant'}")

Pearson-Korrelation:
  Korrelationskoeffizient: r = 0.9125
  P-Wert: 0.000000
  Signifikanz (α=0.05): Signifikant


In [725]:
# Spearman-Korrelation (rang-basiert, robuster)
spearman_corr, spearman_p = stats.spearmanr(
    df_correlation['Investment_Mrd_EUR'], 
    df_correlation['Paper_Count']
)

print(f"Spearman-Korrelation:")
print(f"  Korrelationskoeffizient: ρ = {spearman_corr:.4f}")
print(f"  P-Wert: {spearman_p:.6f}")
print(f"  Signifikanz (α=0.05): {'Signifikant' if spearman_p < 0.05 else 'Nicht signifikant'}")

Spearman-Korrelation:
  Korrelationskoeffizient: ρ = 0.9505
  P-Wert: 0.000000
  Signifikanz (α=0.05): Signifikant


### Regressionsanalyse

In [726]:
# Einfache lineare Regression
X = df_correlation['Investment_Mrd_EUR'].values.reshape(-1, 1)
y = df_correlation['Paper_Count'].values

model = LinearRegression()
model.fit(X, y)

# Vorhersagen
y_pred = model.predict(X)

# R-squared (Bestimmtheitsmaß)
r2 = model.score(X, y)

print(f"Lineare Regression: Papers = β₀ + β₁ × Investment")
print(f"  Achsenabschnitt (β₀): {model.intercept_:,.0f}")
print(f"  Steigung (β₁): {model.coef_[0]:,.0f}")
print(f"  R² (Bestimmtheitsmaß): {r2:.4f}")
print(f"  Erklärte Varianz: {r2*100:.1f}%")

print(f"\nInterpretation:")
print(f"  Pro 1 Mrd € mehr Investment → {model.coef_[0]:,.0f} mehr Papers (erwartet)")
if model.coef_[0] > 0:
    print(f"  Höhere Investitionen führen zu mehr Papers")
else:
    print(f"  Höhere Investitionen führen zu weniger Papers (ungewöhnlich)")

Lineare Regression: Papers = β₀ + β₁ × Investment
  Achsenabschnitt (β₀): 205,097
  Steigung (β₁): 1,561
  R² (Bestimmtheitsmaß): 0.8326
  Erklärte Varianz: 83.3%

Interpretation:
  Pro 1 Mrd € mehr Investment → 1,561 mehr Papers (erwartet)
  Höhere Investitionen führen zu mehr Papers


### Ranking und Effizienz

In [727]:
# Sortiere nach Paperanzahl
df_sorted_papers = df_correlation.sort_values('Paper_Count', ascending=False)

print(f"Ranking nach Paper-Anzahl:")
for i, row in df_sorted_papers.iterrows():
    print(f"  {df_sorted_papers.index.get_loc(i)+1:2d}. {row['Country']:15s}: "
            f"{row['Paper_Count']:8,} Papers, {row['Investment_Mrd_EUR']:6.2f} Mrd €")

Ranking nach Paper-Anzahl:
   1. Germany        : 2,684,903 Papers, 1931.49 Mrd €
   2. France         : 2,340,240 Papers, 1079.69 Mrd €
   3. Italy          : 1,628,325 Papers, 489.08 Mrd €
   4. Spain          : 1,490,117 Papers, 314.67 Mrd €
   5. Netherlands    :  883,788 Papers, 323.87 Mrd €
   6. Poland         :  841,244 Papers,  94.10 Mrd €
   7. Sweden         :  565,233 Papers, 306.92 Mrd €
   8. Belgium        :  533,431 Papers, 233.20 Mrd €
   9. Portugal       :  429,884 Papers,  55.86 Mrd €
  10. Denmark        :  405,820 Papers, 174.86 Mrd €
  11. Austria        :  383,322 Papers, 215.65 Mrd €
  12. Czech Republic :  329,260 Papers,  65.08 Mrd €
  13. Finland        :  297,821 Papers, 151.55 Mrd €
  14. Greece         :  285,386 Papers,  38.28 Mrd €
  15. Romania        :  235,189 Papers,  16.53 Mrd €
  16. Ireland        :  225,952 Papers,  76.19 Mrd €
  17. Hungary        :  196,164 Papers,  32.92 Mrd €
  18. Croatia        :  120,725 Papers,  10.06 Mrd €
  19. Slovaki

In [728]:
# Sortiere nach Effizienz
df_sorted_efficiency = df_correlation.sort_values('Papers_per_Mio_EUR', ascending=False)

print(f"Ranking nach Effizienz (Papers pro Mio. €):")
for i, row in df_sorted_efficiency.iterrows():
    print(f"  {df_sorted_efficiency.index.get_loc(i)+1:2d}. {row['Country']:15s}: "
            f"{row['Papers_per_Mio_EUR']:6.1f} Papers/Mio.€")

Ranking nach Effizienz (Papers pro Mio. €):
   1. Romania        :   14.2 Papers/Mio.€
   2. Latvia         :   13.3 Papers/Mio.€
   3. Bulgaria       :   12.7 Papers/Mio.€
   4. Croatia        :   12.0 Papers/Mio.€
   5. Malta          :   11.9 Papers/Mio.€
   6. Cyprus         :   10.0 Papers/Mio.€
   7. Poland         :    8.9 Papers/Mio.€
   8. Slovakia       :    8.8 Papers/Mio.€
   9. Lithuania      :    8.1 Papers/Mio.€
  10. Portugal       :    7.7 Papers/Mio.€
  11. Greece         :    7.5 Papers/Mio.€
  12. Estonia        :    7.0 Papers/Mio.€
  13. Hungary        :    6.0 Papers/Mio.€
  14. Slovenia       :    5.4 Papers/Mio.€
  15. Czech Republic :    5.1 Papers/Mio.€
  16. Spain          :    4.7 Papers/Mio.€
  17. Italy          :    3.3 Papers/Mio.€
  18. Luxembourg     :    3.1 Papers/Mio.€
  19. Ireland        :    3.0 Papers/Mio.€
  20. Netherlands    :    2.7 Papers/Mio.€
  21. Denmark        :    2.3 Papers/Mio.€
  22. Belgium        :    2.3 Papers/Mio.€
  23. Fran