# Monthly data cleaning

In [20]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt



In [21]:
df = pd.read_csv("src/data/analyzed/monthly_demand_clean.csv")


In [22]:
#sum total; industry etc to get a European value
df_europe = df.groupby(['type', 'year', 'month' ])['demand'].sum().reset_index()
df_europe['country'] = 'Europe'  # Assign country as "Europe"

df = pd.concat([df, df_europe], ignore_index=True)

In [23]:
# Define a mapping of EU country codes to their full names
country_mapping = {
    "AT": "Austria", "BE": "Belgium", "BG": "Bulgaria", "HR": "Croatia", 
    "CY": "Cyprus", "CZ": "Czech Republic", "DK": "Denmark", "EE": "Estonia", 
    "FI": "Finland", "FR": "France", "DE": "Germany", "GR": "Greece", 
    "HU": "Hungary", "IS": "Iceland", "IE": "Ireland", "IT": "Italy", 
    "LV": "Latvia", "LI": "Liechtenstein", "LT": "Lithuania", "LU": "Luxembourg", 
    "MT": "Malta", "NL": "Netherlands", "NO": "Norway", "PL": "Poland", 
    "PT": "Portugal", "RO": "Romania", "SK": "Slovakia", "SI": "Slovenia", 
    "ES": "Spain", "SE": "Sweden", "CH": "Switzerland", "TR": "Turkey", 
    "UK": "United Kingdom", "Europe": "Europe*"
}

# Replace country codes with full names
df["country_full"] = df["country"].map(country_mapping)
df["country"] = df["country_full"]

# Create the 'countryType' column with full country names
df["country_type"] = df["country_full"] + " - " + df["type"]

In [24]:
# Define the years to average
years_to_average = [2019, 2020, 2021]

# Filter for rows corresponding to the specified years
filtered_df = df[df['year'].isin(years_to_average)]

# Group by country, type, and month, then calculate the average demand
index_df = (
    filtered_df
    .groupby(['country', 'type', 'month'])['demand']
    .mean()
    .reset_index(name='monthly_index')  # Calculate the average demand
)


df= df.merge(index_df, on=['country', 'type', 'month'], how='left')




# Indexed

In [25]:
# Calculate indexed demand values for all rows except "AVG-2019-2021"
df['demand_sector'] = (df['demand'] - df['monthly_index'])
df['demand_indexed'] = (df['demand'] / df['monthly_index'])*100


In [None]:
## For line chart

In [26]:
df_i = df.copy()
index_df_i = index_df.copy()

In [27]:

# Assuming index_df and df are predefined DataFrames
# Add a new column for the year and set it to "2021"
index_df_i['year'] = "2021"  # Changed to "AVG-2019-2021" based on your requirement
index_df_i['month'] = 12

# Set indexed_demand to 1 for the new rows
index_df_i['monthly_index'] = 1


# Set indexed_demand explicitly to 100 for all rows in index_df
index_df_i['demand_indexed'] = 100

# Keep unique rows based on 'country' and 'type'
unique_index_df = index_df_i.drop_duplicates(subset=['country', 'type'])

# Concatenate unique rows with the original DataFrame
df_i = pd.concat([unique_index_df, df], ignore_index=True)
df_i["month"] = df_i["month"].astype(str).str.zfill(2)  # Add leading zero to month if needed
df_i["year"] = df_i["year"].astype(str)

# Create the new column 'monthb' as a concatenation of 'month' and 'year'
df_i["x_value"] =  df_i["month"] + "/"+ df_i["year"]


In [None]:
## For line chart

In [32]:
df_s = df.copy()
index_df_s = index_df.copy()

In [33]:
df_s["month"] = df_s["month"].astype(str).str.zfill(2)  # Add leading zero to month if needed
df_s["year"] = df_s["year"].astype(str)

# Create the new column 'monthb' as a concatenation of 'month' and 'year'
df_s["x_value"] =  df_s["month"] + "/"+ df_s["year"]

In [38]:

# Pivot the table to make it easier to calculate missing types
pivot_df = df_s.pivot_table(index=["country", "x_value"], columns="type", values="demand_sector", aggfunc="sum")

# Create new types based on combinations of available types
pivot_df["industry - household"] = pivot_df.apply(
    lambda row: row["industry"] - row["household"] if pd.notnull(row["industry"]) and pd.notnull(row["household"]) else None,
    axis=1
)
pivot_df["household - power"] = pivot_df.apply(
    lambda row: row["household"] - row["power"] if pd.notnull(row["household"]) and pd.notnull(row["power"]) else None,
    axis=1
)
pivot_df["power - industry"] = pivot_df.apply(
    lambda row: row["power"] - row["industry"] if pd.notnull(row["power"]) and pd.notnull(row["industry"]) else None,
    axis=1
)
pivot_df["power - household"] = pivot_df.apply(
    lambda row: row["power"] - row["household"] if pd.notnull(row["power"]) and pd.notnull(row["household"]) else None,
    axis=1
)
pivot_df["household - industry"] = pivot_df.apply(
    lambda row: row["household"] - row["industry"] if pd.notnull(row["household"]) and pd.notnull(row["industry"]) else None,
    axis=1
)

# Melt back to long format for the final output
result = pivot_df.reset_index().melt(
    id_vars=["country", "x_value"], 
    value_vars=["power", "industry", "household", "industry - household", "household - power", "power - industry", "power - household", "household - industry"],
    var_name="type", value_name="demand_sector"
).dropna(subset=["demand_sector"])

df_s = result.copy()


In [30]:
df_s = df_s[df_s['type'] != "total"]

#Line chart

In [31]:

#Drop type=total
# Remove rows with years 2019, 2020, or 2021
df_i= df_i[~df_i['year'].isin(years_to_average)]



# Save .json

In [39]:
df_json = df_s.copy()
df_json["y_value"] = df_s.pop("demand_sector")
df_json["group_value"] = df_s.pop("type")
df_json["group_b_value"] = df_s.pop("country")
# Save to a JSON file
file_path = "highcharts/data/monthly_demand_sector.json"
df_json.to_json(file_path, orient='records', indent=4)
print(f"The file has been saved as: {file_path}")


The file has been saved as: highcharts/data/monthly_demand_sector.json


In [40]:
df_json = df_i.copy()
df_json["y_value"] = df.pop("demand_indexed")
df_json["group_value"] = df.pop("country_type")
# Save to a JSON file
file_path = "highcharts/data/monthly_demand_indexed.json"
df_json.to_json(file_path, orient='records', indent=4)
print(f"The file has been saved as: {file_path}")

The file has been saved as: highcharts/data/monthly_demand_indexed.json
