In [16]:
import pandas as pd
import numpy as np



In [17]:
amr_eu = pd.read_csv('amr_eu.csv')
amr_eu.head()

Unnamed: 0,HealthTopic,Population,Indicator,Unit,Time,RegionCode,RegionName,NumValue,TxtValue
0,Antimicrobial resistance,Acinetobacter spp.|Aminoglycosides,"I - 'susceptible, increased exposure' isolates",N,2012,BG,Bulgaria,6.0,
1,Antimicrobial resistance,Acinetobacter spp.|Aminoglycosides,"I - 'susceptible, increased exposure' isolates",N,2012,CY,Cyprus,0.0,
2,Antimicrobial resistance,Acinetobacter spp.|Aminoglycosides,"I - 'susceptible, increased exposure' isolates",N,2012,DE,Germany,0.0,
3,Antimicrobial resistance,Acinetobacter spp.|Aminoglycosides,"I - 'susceptible, increased exposure' isolates",N,2012,DK,Denmark,0.0,
4,Antimicrobial resistance,Acinetobacter spp.|Aminoglycosides,"I - 'susceptible, increased exposure' isolates",N,2012,EL,Greece,118.0,


In [18]:
# Drop HealthTopic column
amr_eu = amr_eu.drop(columns=['HealthTopic'])

# Population columnda Sadece "Combined" içeren satırlar kalsın
amr_eu = amr_eu[amr_eu['Population'].str.contains("Combined")]

In [19]:
# Bakteri isimlerini kısaltma
bacteria_mapping = {
    'Acinetobacter spp.|Combined resistance (fluoroquinolones, aminoglycosides and carbapenems)': 'Acinetobacter',
    'Escherichia coli|Combined resistance (third-generation cephalosporin, fluoroquinolones and aminoglycoside)': 'E. coli',
    'Klebsiella pneumoniae|Combined resistance (third-generation cephalosporin, fluoroquinolones and aminoglycoside)': 'K. pneumoniae',
    'Pseudomonas aeruginosa|Combined resistance (at least three of piperac. and tazob., fluoroq., ceftaz., aminogl. and carbapenems)': 'P. aeruginosa'
}

# Population kolonunu değiştir
amr_eu['Population'] = amr_eu['Population'].map(bacteria_mapping)

# Kontrol et
amr_eu.Population.unique()

array(['Acinetobacter', 'E. coli', 'K. pneumoniae', 'P. aeruginosa'],
      dtype=object)

In [20]:
# Veri yapısını kontrol et
print("DataFrame boyutu:", amr_eu.shape)
print("\nKolonlar:", amr_eu.columns.tolist())
print("\nİlk satırlar:")
amr_eu.head()

DataFrame boyutu: (6558, 8)

Kolonlar: ['Population', 'Indicator', 'Unit', 'Time', 'RegionCode', 'RegionName', 'NumValue', 'TxtValue']

İlk satırlar:


Unnamed: 0,Population,Indicator,Unit,Time,RegionCode,RegionName,NumValue,TxtValue
3670,Acinetobacter,R - resistant isolates,N,2012,BG,Bulgaria,19.0,
3671,Acinetobacter,R - resistant isolates,N,2012,CY,Cyprus,11.0,
3672,Acinetobacter,R - resistant isolates,N,2012,DE,Germany,5.0,
3673,Acinetobacter,R - resistant isolates,N,2012,DK,Denmark,5.0,
3674,Acinetobacter,R - resistant isolates,N,2012,EL,Greece,896.0,


In [21]:
# Yıl aralığını ve ülkeleri kontrol et
print("Yıl aralığı:", amr_eu['Time'].min(), "-", amr_eu['Time'].max())
print("\nÜlke sayısı:", amr_eu['RegionName'].nunique())
print("\nBakteri türleri:")
print(amr_eu['Population'].value_counts())

Yıl aralığı: 2000 - 2024

Ülke sayısı: 31

Bakteri türleri:
Population
E. coli          2088
K. pneumoniae    1740
P. aeruginosa    1629
Acinetobacter    1101
Name: count, dtype: int64


In [22]:
# Direnç oranı (%) için veriyi filtrele ve hazırla
# Sadece yüzde (%) olan verileri alalım
resistance_data = amr_eu[amr_eu['Unit'] == '%'].copy()

print("Direnç verisi boyutu:", resistance_data.shape)
print("\nÖrnek veriler:")
resistance_data.head()

Direnç verisi boyutu: (2186, 8)

Örnek veriler:


Unnamed: 0,Population,Indicator,Unit,Time,RegionCode,RegionName,NumValue,TxtValue
4037,Acinetobacter,"R - resistant isolates, percentage",%,2012,BG,Bulgaria,32.75862068,
4038,Acinetobacter,"R - resistant isolates, percentage",%,2012,CY,Cyprus,47.82608695,
4039,Acinetobacter,"R - resistant isolates, percentage",%,2012,DE,Germany,4.20168067,
4040,Acinetobacter,"R - resistant isolates, percentage",%,2012,DK,Denmark,8.62068965,
4041,Acinetobacter,"R - resistant isolates, percentage",%,2012,EL,Greece,74.4804655,


In [23]:
# Model için gerekli kütüphaneleri import et
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
import warnings
warnings.filterwarnings('ignore')

In [24]:
# Her ülke ve bakteri kombinasyonu için tahmin fonksiyonu
def predict_resistance(country, bacteria, data):
    """
    Belirli bir ülke ve bakteri için 2025-2030 arası direnç oranlarını tahmin eder
    """
    # Filtreleme
    country_bacteria_data = data[
        (data['RegionName'] == country) & 
        (data['Population'] == bacteria)
    ].copy()
    
    if len(country_bacteria_data) < 3:  # Minimum 3 veri noktası gerekli
        return None
    
    # Veriyi hazırla - numeric olmayan değerleri temizle
    country_bacteria_data = country_bacteria_data.sort_values('Time')
    country_bacteria_data = country_bacteria_data[pd.to_numeric(country_bacteria_data['NumValue'], errors='coerce').notna()]
    
    if len(country_bacteria_data) < 3:  # Temizleme sonrası tekrar kontrol
        return None
    
    X = country_bacteria_data['Time'].values.reshape(-1, 1)
    y = country_bacteria_data['NumValue'].astype(float).values
    
    # Polinom regresyon modeli (2. derece)
    poly = PolynomialFeatures(degree=2)
    X_poly = poly.fit_transform(X)
    
    # Model eğitimi
    model = LinearRegression()
    model.fit(X_poly, y)
    
    # 2025-2030 tahminleri
    future_years = np.array([2025, 2026, 2027, 2028, 2029, 2030]).reshape(-1, 1)
    future_poly = poly.transform(future_years)
    predictions = model.predict(future_poly)
    
    # Tahminleri 0-100 arasında sınırla
    predictions = np.clip(predictions, 0, 100)
    
    return predictions

# Testi
test_prediction = predict_resistance('Germany', 'E. coli', resistance_data)
print("Test Tahmini (Almanya, E. coli):")
print(test_prediction)

Test Tahmini (Almanya, E. coli):
[1.37581865 0.98097178 0.54946847 0.08130873 0.         0.        ]


In [25]:
# Tüm ülkeler ve bakteriler için tahminleri üret
countries = resistance_data['RegionName'].unique()
bacteria_list = resistance_data['Population'].unique()
future_years = [2025, 2026, 2027, 2028, 2029, 2030]

# Sonuçları saklamak için liste
results = []

# Her kombinasyon için tahmin yap
for country in countries:
    for bacteria in bacteria_list:
        predictions = predict_resistance(country, bacteria, resistance_data)
        
        if predictions is not None:
            # Her yıl için ayrı satır ekle
            for year, pred in zip(future_years, predictions):
                results.append({
                    'Country': country,
                    'Bacteria': bacteria,
                    'Year': year,
                    'Predicted_Resistance_%': round(pred, 2)
                })

# DataFrame'e dönüştür
predictions_df = pd.DataFrame(results)

print(f"Toplam tahmin sayısı: {len(predictions_df)}")
print(f"\nÜlke sayısı: {predictions_df['Country'].nunique()}")
print(f"Bakteri sayısı: {predictions_df['Bacteria'].nunique()}")
print("\nİlk tahminler:")
predictions_df.head(12)

Toplam tahmin sayısı: 696

Ülke sayısı: 30
Bakteri sayısı: 4

İlk tahminler:


Unnamed: 0,Country,Bacteria,Year,Predicted_Resistance_%
0,Bulgaria,Acinetobacter,2025,76.96
1,Bulgaria,Acinetobacter,2026,74.5
2,Bulgaria,Acinetobacter,2027,71.23
3,Bulgaria,Acinetobacter,2028,67.16
4,Bulgaria,Acinetobacter,2029,62.28
5,Bulgaria,Acinetobacter,2030,56.61
6,Bulgaria,E. coli,2025,14.09
7,Bulgaria,E. coli,2026,13.21
8,Bulgaria,E. coli,2027,12.24
9,Bulgaria,E. coli,2028,11.16


In [26]:
# Sonuçları CSV dosyasına kaydet
predictions_df.to_csv('future_predictions_2025_2030.csv', index=False)
print("Tahminler 'future_predictions_2025_2030.csv' dosyasına kaydedildi.")

Tahminler 'future_predictions_2025_2030.csv' dosyasına kaydedildi.


In [27]:
# Bakterilerin karşılaşılma sıklığı ağırlıkları (toplamı 1)
resistance_weights = {
    'E. coli': 0.64,  # En yaygın
    'K. pneumoniae': 0.22,  # İkinci en yaygın
    'P. aeruginosa': 0.10,  # Üçüncü
    'Acinetobacter': 0.04  # En az yaygın ama yoğun bakımda önemli
}

In [28]:
# Her ülke ve yıl için ağırlıklı genel AMR skorunu hesapla
general_amr_results = []

for country in predictions_df['Country'].unique():
    for year in predictions_df['Year'].unique():
        # O ülke ve yıl için tüm bakterileri al
        country_year_data = predictions_df[
            (predictions_df['Country'] == country) & 
            (predictions_df['Year'] == year)
        ]
        
        # Ağırlıklı ortalama hesapla
        weighted_sum = 0
        total_weight = 0
        
        for _, row in country_year_data.iterrows():
            bacteria = row['Bacteria']
            resistance = row['Predicted_Resistance_%']
            weight = resistance_weights.get(bacteria, 0)
            
            weighted_sum += resistance * weight
            total_weight += weight
        
        # Genel AMR skorunu ekle
        if total_weight > 0:
            general_amr = round(weighted_sum / total_weight, 2)
            general_amr_results.append({
                'Country': country,
                'Bacteria': 'General AMR',
                'Year': year,
                'Predicted_Resistance_%': general_amr
            })

# General AMR'ı orijinal dataframe'e ekle
general_amr_df = pd.DataFrame(general_amr_results)
predictions_with_general = pd.concat([predictions_df, general_amr_df], ignore_index=True)

# Sırala: Ülke, Yıl, Bakteri
predictions_with_general = predictions_with_general.sort_values(['Country', 'Year', 'Bacteria'])

print(f"Toplam kayıt sayısı (General AMR dahil): {len(predictions_with_general)}")
print(f"\nGeneral AMR örnekleri:")
predictions_with_general[predictions_with_general['Bacteria'] == 'General AMR'].head(10)

Toplam kayıt sayısı (General AMR dahil): 876

General AMR örnekleri:


Unnamed: 0,Country,Bacteria,Year,Predicted_Resistance_%
804,Austria,General AMR,2025,2.0
805,Austria,General AMR,2026,1.72
806,Austria,General AMR,2027,1.41
807,Austria,General AMR,2028,1.08
808,Austria,General AMR,2029,0.72
809,Austria,General AMR,2030,0.34
810,Belgium,General AMR,2025,4.34
811,Belgium,General AMR,2026,4.16
812,Belgium,General AMR,2027,3.95
813,Belgium,General AMR,2028,3.7


In [29]:
# Güncellenmiş veriyi CSV'ye kaydet
predictions_with_general.to_csv('future_predictions_2025_2030.csv', index=False)
print("Tahminler (General AMR dahil) 'future_predictions_2025_2030.csv' dosyasına kaydedildi.")
print(f"\nÖrnek Bulgaria verileri:")
predictions_with_general[predictions_with_general['Country'] == 'Bulgaria'].tail(10)

Tahminler (General AMR dahil) 'future_predictions_2025_2030.csv' dosyasına kaydedildi.

Örnek Bulgaria verileri:


Unnamed: 0,Country,Bacteria,Year,Predicted_Resistance_%
4,Bulgaria,Acinetobacter,2029,62.28
10,Bulgaria,E. coli,2029,9.99
700,Bulgaria,General AMR,2029,31.26
16,Bulgaria,K. pneumoniae,2029,80.81
22,Bulgaria,P. aeruginosa,2029,46.0
5,Bulgaria,Acinetobacter,2030,56.61
11,Bulgaria,E. coli,2030,8.73
701,Bulgaria,General AMR,2030,31.21
17,Bulgaria,K. pneumoniae,2030,84.24
23,Bulgaria,P. aeruginosa,2030,48.21


In [30]:
# Generate and save figures based on predictions_with_general
import os
import matplotlib.pyplot as plt
import seaborn as sns

# Ensure figures directory exists in the same folder as this notebook
figures_dir = 'figures'
os.makedirs(figures_dir, exist_ok=True)

# 1) Top 20 countries by 2030 General AMR (bar chart)
gen2030 = predictions_with_general[
    (predictions_with_general['Bacteria'] == 'General AMR') &
    (predictions_with_general['Year'] == 2030)
].copy()

if not gen2030.empty:
    top20 = gen2030.sort_values('Predicted_Resistance_%', ascending=False).head(20)
    plt.figure(figsize=(10, 8))
    sns.barplot(data=top20, y='Country', x='Predicted_Resistance_%', palette='viridis')
    plt.title('Top 20 Countries by Predicted General AMR in 2030')
    plt.xlabel('Predicted Resistance (%)')
    plt.ylabel('Country')
    plt.xlim(0, 100)
    plt.tight_layout()
    out1 = os.path.join(figures_dir, 'general_amr_2030_top20.png')
    plt.savefig(out1, dpi=200)
    plt.close()

# 2) Heatmap of 2030 predicted resistance by bacteria for top 15 countries by General AMR
bacteria_main = ['E. coli','K. pneumoniae','P. aeruginosa','Acinetobacter']
if not gen2030.empty:
    top15_countries = gen2030.sort_values('Predicted_Resistance_%', ascending=False).head(15)['Country'].tolist()
    hm = predictions_with_general[
        (predictions_with_general['Year'] == 2030) &
        (predictions_with_general['Bacteria'].isin(bacteria_main)) &
        (predictions_with_general['Country'].isin(top15_countries))
    ].copy()
    if not hm.empty:
        pivot = hm.pivot_table(index='Country', columns='Bacteria', values='Predicted_Resistance_%')
        try:
            pivot = pivot[bacteria_main]
        except Exception:
            pass
        plt.figure(figsize=(12, max(6, 0.5*len(pivot))))
        sns.heatmap(pivot, annot=True, fmt='.1f', cmap='YlOrRd', vmin=0, vmax=100, cbar_kws={'label': 'Predicted %'})
        plt.title('Predicted Resistance by Bacteria (2030) — Top 15 Countries by General AMR')
        plt.xlabel('Bacteria')
        plt.ylabel('Country')
        plt.tight_layout()
        out2 = os.path.join(figures_dir, 'resistance_heatmap_2030_top15.png')
        plt.savefig(out2, dpi=200)
        plt.close()

# 3) Country-level 2025-2030 trends for a selected country (Turkey if available, else Germany, else first)
available_countries = predictions_with_general['Country'].unique().tolist()
preferred = None
for cand in ['Turkey','Germany']:
    if cand in available_countries:
        preferred = cand
        break
if preferred is None and available_countries:
    preferred = available_countries[0]

if preferred is not None:
    years = sorted(predictions_with_general['Year'].unique())
    years = [y for y in years if 2025 <= y <= 2030]
    subset = predictions_with_general[(predictions_with_general['Country'] == preferred) & (predictions_with_general['Year'].isin(years))]
    plt.figure(figsize=(10,6))
    for lab in bacteria_main + ['General AMR']:
        series = subset[subset['Bacteria'] == lab].sort_values('Year')
        if not series.empty:
            plt.plot(series['Year'], series['Predicted_Resistance_%'], marker='o', label=lab)
    plt.ylim(0, 100)
    plt.title(f'Predicted Resistance Trends (2025-2030) — {preferred}')
    plt.xlabel('Year')
    plt.ylabel('Predicted Resistance (%)')
    plt.legend(title='Bacteria', ncol=2)
    plt.grid(alpha=0.3)
    plt.tight_layout()
    out3 = os.path.join(figures_dir, f'country_predictions_{preferred.replace(" ", "_")}.png')
    plt.savefig(out3, dpi=200)
    plt.close()

# Print saved files to help verify
saved = []
for name in ['general_amr_2030_top20.png','resistance_heatmap_2030_top15.png', f'country_predictions_{preferred.replace(" ", "_")}.png' if preferred else None]:
    if name:
        p = os.path.join(figures_dir, name)
        if os.path.exists(p):
            saved.append(p)
print('Saved figures:')
for s in saved:
    print('-', s)

Saved figures:
- figures/general_amr_2030_top20.png
- figures/resistance_heatmap_2030_top15.png
- figures/country_predictions_Germany.png


# AMR forecasting and policy report (no code)

## Forecasting pipeline overview

- Objective: Estimate antimicrobial resistance (AMR) percentages for key pathogens across European countries for 2025–2030 and derive a weighted “General AMR” score to support policy discussions.
- Input data: `amr_eu.csv` (final_model/), containing country-level AMR metrics across years with multiple measurement units and pathogen categories.

## Data preparation

1) Column selection and filtering
- Removed a non-essential `HealthTopic` column.
- Kept only records whose `Population` category indicates combined resistance per organism (e.g., combined resistance across several antibiotic classes).

2) Harmonizing organism labels
- Mapped verbose organism descriptions to concise labels:
  - Acinetobacter → "Acinetobacter"
  - Escherichia coli → "E. coli"
  - Klebsiella pneumoniae → "K. pneumoniae"
  - Pseudomonas aeruginosa → "P. aeruginosa"

3) Metric focus
- Restricted the dataset to rows where `Unit` equals `%`, ensuring we model true resistance percentages.

## Modeling approach

- Per country–organism combination, trained a polynomial regression model (degree 2) on historical `Time` vs. resistance `%`.
- Generated forecasts for each year from 2025 to 2030.
- Post-processed predictions by bounding values to the feasible range [0, 100] (%).

## Outputs

- Produced a tidy forecast table with columns: Country, Bacteria, Year, Predicted_Resistance_%.
- Saved forecasts to `future_predictions_2025_2030.csv`.

## General AMR score (composite)

- Computed a weighted composite to summarize organism-level resistance into a single headline number per country and year.
- Weights reflect typical encounter frequency and policy relevance:
  - E. coli: 0.40
  - K. pneumoniae: 0.30
  - P. aeruginosa: 0.20
  - Acinetobacter: 0.10
- Appended these as rows where `Bacteria = "General AMR"` and saved into the same CSV.

## Figures generated and location

- Saved static images to `final_model/figures/` (created automatically if missing):
  1) `general_amr_2030_top20.png`: Top 20 countries by predicted General AMR in 2030 (bar chart).
  2) `resistance_heatmap_2030_top15.png`: 2030 heatmap of organism-specific resistance for the 15 countries with the highest General AMR (E. coli, K. pneumoniae, P. aeruginosa, Acinetobacter).
  3) `country_predictions_<COUNTRY>.png`: 2025–2030 trend lines for a representative country (prefers Turkey if available, then Germany, else the first available).

## How to read the results

- Organism-specific lines show the predicted resistance trajectory for that organism.
- “General AMR” is an aggregate proxy to contextualize overall resistance pressure; it is not a substitute for organism-specific detail.
- Values are bounded between 0–100% and should be interpreted directionally; year-to-year noise is mitigated with a simple quadratic trend.

## Assumptions and notes

- The degree-2 polynomial was chosen for its simplicity and ability to capture gentle curvature in trends without overfitting small samples.
- Minimum data requirement: at least three historical observations per country–organism were needed to fit a model.
- Mapping and filtering steps ensure consistency in organism labels and measurement units.

## Potential next steps

- Enrich with covariates (antibiotic consumption, stewardship policies, demographics) via multivariate models.
- Add uncertainty intervals (e.g., bootstrap or Bayesian credible intervals).
- Expand organism coverage and validate with external datasets or expert review.