In [None]:
import pandas as pd
from datawrapper import Datawrapper
from datetime import datetime
from datetime import date, timedelta, timezone
import os

In [None]:
DW_SECRET = os.getenv(dw_secret)
dw = Datawrapper(access_token=DW_SECRET)

In [None]:
# NOTEBOOK-SPEZIFISCHE FUNKTIONEN

# Function to convert python datetimes data types into german dates in format Month + Year
def datetime_to_german_date(date_str):
  date_obj = datetime.strptime(date_str, '%Y-%m-%d %H:%M:%S')

# Format the date into words in German
  german_date = date_obj.strftime('%B %Y')

# Replace month name with its German equivalent
  german_months = {
    "January": "Januar", "February": "Februar", "March": "März", "April": "April", "May": "Mai", "June": "Juni",
    "July": "Juli", "August": "August", "September": "September", "October": "Oktober", "November": "November", "December": "Dezember"
  }

# Apply German translation to month name
  for english_month, german_month in german_months.items():
    if german_date.startswith(english_month):
        german_date = german_date.replace(english_month, german_month)

  return german_date


def format_german_number(number):
    # Format the number using str.format() with the comma as the thousands separator
    if number % 1 != 0:
      formatted_number = "{:,.1f}".format(number)
    else:
      formatted_number = "{:,.0f}".format(number)
    # Replace the commas with periods for thousands and the dot with a comma for the decimal point
    formatted_number = formatted_number.replace(",", "X").replace(".", ",").replace("X", ".")
    return formatted_number

def month_year_to_full(month_year):
  short_months = {
    "Jan": "Januar", "Feb": "Februar", "Mrz": "März", "Apr": "April", "Mai": "Mai", "Jun": "Juni",
    "Jul": "Juli", "Aug": "August", "Sep": "September", "Okt": "Oktober", "Nov": "November", "Dez": "Dezember"
  }
  month, year = month_year.split()

  full_month = short_months.get(month)

  if full_month:
      return f"{full_month} {year}"
  else:
      return "Invalid month abbreviation"



In [None]:
url_uba_eee = "https://www.umweltbundesamt.de/sites/default/files/medien/372/dokumente/zeitreihen-zur-entwicklung-der-erneuerbaren-energien-in-deutschland-excel_uba_deu_0.xlsx"

response = requests.get(url_uba_eee)
excel_data_uba_eee = BytesIO(response.content)

# Einlesen der Daten aus Excel-Datei
data_uba_eee = pd.ExcelFile(excel_data_uba_eee)
anteil_ee_bsv = pd.read_excel(data_uba_eee, sheet_name='2', skiprows=7, nrows=2) # Skip seven rows, take two rows

# Take years out of column names
anteil_ee_bsv.columns = anteil_ee_bsv.columns.astype(str).str[:4]

# Transpose df such that format is 39x2
anteil_ee_bsv_t = anteil_ee_bsv.transpose()
anteil_ee_bsv = anteil_ee_bsv_t.drop(0, axis=1)
# Turn index (years) into a separate column
anteil_ee_bsv['Jahr'] = anteil_ee_bsv.index
anteil_ee_bsv.reset_index(drop=True)

# Speichern der Rohdaten
anteil_ee_bsv.columns = ['Anteil der Erneuerbaren an Bruttostromverbrauch', 'Jahr']

# Remove first row, reset index to start from 0
anteil_ee_bsv = anteil_ee_bsv.drop(['Anga'])
anteil_ee_bsv.index = range(len(anteil_ee_bsv))

# Make sure 'Jahr' is treated as a numeric column
anteil_ee_bsv['Jahr'] = pd.to_numeric(anteil_ee_bsv['Jahr'])

# Get most recent year from data
last_year = anteil_ee_bsv['Jahr'].max()

new_years = list(range(last_year + 1, 2031))  # From the next year after the last year to 2030

# Create a new DataFrame with these years and empty (NaN) values
new_data = pd.DataFrame({
    'Jahr': new_years,
    'Anteil der Erneuerbaren an Bruttostromverbrauch': [np.nan] * len(new_years),  # Empty (NaN) values for the third column
})

# Concatenate the original DataFrame with the new data
anteil_ee_bsv = pd.concat([anteil_ee_bsv, new_data], ignore_index=True)

condition = anteil_ee_bsv['Jahr'] == last_year
last_year_bsv = anteil_ee_bsv.loc[condition, 'Anteil der Erneuerbaren an Bruttostromverbrauch'].values[0]

ksg_values = {
    last_year: last_year_bsv,
    2030: 80
}

# Interpolation: fill in values between fixed 'anchor points' in KSG
years = anteil_ee_bsv['Jahr'].values
ksg_ziele = np.interp(years, [last_year, 2030], [ksg_values[last_year], ksg_values[2030]])


# Add the new column to the DataFrame
anteil_ee_bsv['EEG Ziele'] = ksg_ziele

# Remove ksg ziele for previous years
anteil_ee_bsv.loc[anteil_ee_bsv['Jahr'] < last_year, 'EEG Ziele'] = np.nan

# Get the value for the year before last (last year - 1)
twoyearsago_bsv = anteil_ee_bsv.loc[anteil_ee_bsv['Jahr'] == last_year - 1, 'Anteil der Erneuerbaren an Bruttostromverbrauch'].values[0]

# Calculate the absolute change in percentage points
absolute_change_bsv = last_year_bsv - twoyearsago_bsv

# Determine if it's an increase or decrease
if absolute_change_bsv > 0:
    change_text_bsv = f"{format_german_number(absolute_change_bsv)} Prozentpunkten höher als im Vorjahr"
else:
    change_text_bsv = f"{format_german_number(absolute_change_bsv)} Prozentpunkten niedriger als im Vorjahr"

# Construct the custom intro message
header = f"Der Anteil der Erneuerbaren an Bruttostromverbrauch lag {last_year} bei {format_german_number(last_year_bsv)} % und damit {change_text_bsv}. Das gesetztliche Ziel (EEG) für 2030 liegt bei 80 %."

# Create the chart description
CHART_ID = "8gSjZ"
dw.add_data(chart_id=CHART_ID, data=anteil_ee_bsv)
dw.update_description(chart_id=CHART_ID, intro=header)
dw.publish_chart(chart_id=CHART_ID, display=False)