<a href="https://colab.research.google.com/github/alendoko/bina/blob/main/LB4/DATA/Python_JUPYTER_Data_Analysis_SteyByStep_Melbourne_Houseprice2025.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Jupiter Notebook, welches sich die Daten von bestehenden PV-Anlagen in der Schweiz analysiert

## Daten laden, aufbereiten und anreichern

In [None]:
import pandas as pd

# Load CSV File
file_path = '../data/electricity_production_plants/ElectricityProductionPlant.csv'  # Path to the main dataset
df = pd.read_csv(file_path)  # Read the CSV file into a DataFrame

# Load and replace MainCategory values
main_categories_file_path = '../data/electricity_production_plants/MainCategoryCatalogue.csv'
main_categories = pd.read_csv(main_categories_file_path)
main_categories_dict = dict(zip(main_categories['Catalogue_id'], main_categories['de']))
df['MainCategory'] = df['MainCategory'].replace(main_categories_dict)

# Load and replace SubCategory values
sub_categories_file_path = '../data/electricity_production_plants/SubCategoryCatalogue.csv'
sub_categories = pd.read_csv(sub_categories_file_path)
sub_categories_dict = dict(zip(sub_categories['Catalogue_id'], sub_categories['de']))
df['SubCategory'] = df['SubCategory'].replace(sub_categories_dict)

# Load and replace PlantCategory values
plant_categories_file_path = '../data/electricity_production_plants/PlantCategoryCatalogue.csv'
plant_categories = pd.read_csv(plant_categories_file_path)
plant_categories_dict = dict(zip(plant_categories['Catalogue_id'], plant_categories['de']))
df['PlantCategory'] = df['PlantCategory'].replace(plant_categories_dict)

# Display the first few rows of the DataFrame to verify the data
df.head()

# TODO: Add visualizations
# - Total potential capacity for all cantons
# - Development of total capacity per canton (2015-2024)

## Karte mit allen PV-Anlagen in der Schweiz, Stand 2024

In [None]:
import folium
from folium.plugins import MarkerCluster, FastMarkerCluster
from pyproj import Transformer

# Filter the dataset for Photovoltaik Anlagen
df_photovoltaik = df[df['SubCategory'] == 'Photovoltaik'].copy()

# Convert the 'BeginningOfOperation' column to datetime
df_photovoltaik['BeginningOfOperation'] = pd.to_datetime(df_photovoltaik['BeginningOfOperation'], errors='coerce')

# Filter the data for installations up to 2024
df_photovoltaik = df_photovoltaik[df_photovoltaik['BeginningOfOperation'].dt.year <= 2024]

# Create a folium map centered on Switzerland
map = folium.Map(location=[46.8, 8.33], zoom_start=7, tiles='TopPlusOpen.Color', attr='Map data: &copy; <a href="http://www.govdata.de/dl-de/by-2-0">dl-de/by-2-0</a>')

# Add a FastMarkerCluster for better performance with large datasets
transformer = Transformer.from_crs("EPSG:2056", "EPSG:4326", always_xy=True)
coordinates = [
    [transformer.transform(row['_x'], row['_y'])[1], transformer.transform(row['_x'], row['_y'])[0]]
    for _, row in df_photovoltaik.iterrows() if not pd.isna(row['_x']) and not pd.isna(row['_y'])
]
FastMarkerCluster(coordinates).add_to(map)

# Add a title to the map
title_html = '''
<div style="font-size:20px;position: absolute;z-index: 1000;left: 25%;"><b>PV-Anlagen in der Schweiz, Stand 2024</b></div>
'''
map.get_root().html.add_child(folium.Element(title_html))

# Save the map to an HTML file
map.save('../docs/assets/diagramme/elcoms_map.html')

# Display the map
map

## Kumulative Entwicklung der Gesamtkapazität ab 2015

In [None]:
import plotly.express as px

df['Year'] = pd.to_datetime(df['BeginningOfOperation'], errors='coerce').dt.year
cumulative_data = df.groupby(['Year', 'Canton'])['TotalPower'].sum().groupby(level=1).cumsum().reset_index()

# Filter data for cumulative values starting from 2015
filtered_data = cumulative_data[(cumulative_data['Year'] >= 2015) & (cumulative_data['Year'] <= 2024)]

# Calculate cumulative power per canton
filtered_data = filtered_data.copy()
filtered_data['CumulativePower'] = filtered_data.groupby('Canton')['TotalPower'].cumsum()

# Create a line plot
fig = px.line(
    filtered_data,
    x='Year',
    y='CumulativePower',
    color='Canton',
    title='Kumulative Entwicklung der Gesamtkapazität pro Kanton ab 2015 (MW)',
    labels={'Year': 'Jahr', 'CumulativePower': 'Kumulative Gesamtkapazität (MW)', 'Canton': 'Kanton'}
)

# Update x-axis to show every year
fig.update_layout(xaxis=dict(tickmode='linear', tick0=2015, dtick=1))

fig.show()
fig.write_html("../docs/assets/diagramme/elcom_entwicklung_total_power.html")

## Gesamtkapazität pro Kanton im Jahr 2024

In [None]:
import plotly.express as px

# Filter data for the year 2024
data_2024 = df[df['Year'] == 2024].groupby('Canton')['TotalPower'].sum().reset_index()

# Create a bar chart with different colors for each bar
fig = px.bar(
    data_2024,
    x='Canton',
    y='TotalPower',
    title='Gesamtkapazität pro Kanton im Jahr 2024 (MW)',
    labels={'Canton': 'Kanton', 'TotalPower': 'Gesamtkapazität (MW)'},
    text='TotalPower',
    color='Canton',  # Assign a unique color per Canton
    color_discrete_sequence=px.colors.qualitative.Set3  # Use a predefined color set
)

# Update layout for better readability
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(xaxis=dict(categoryorder='total descending'))

fig.show()
fig.write_html("../docs/assets/diagramme/elcom_total_power_2024.html")