In [None]:
import altair as alt
import pandas as pd
from pmdarima.arima import CHTest
from pandas.plotting import autocorrelation_plot
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose

In [None]:
df = pd.read_csv(
    "../../hfactory_magic_folders/plastic_cost_prediction/data/PA6_cleaned_dataset.csv"
)
df.rename(
    columns={"time": "Date", "PA6 GLOBAL_ EMEAS _ EUR per TON": "PA6_Price"},
    inplace=True,
)

### Outliers Analysis

Following the Covid-19 pandemic and its related restrictions, in beginning of 2020, there was a drop in demand for energy in general. As a result the natural gas, electricity and oil prices experienced lower prices.

The recovery of economic activities was translated into increased energy demand, and natural gas prices regained their pre-pandemic levels by Q3/Q4 2020. The upward trend continued in 2021.

In 2022, Russia’s war on Ukraine and decision to suspend deliveries of gas to some EU member states have pushed up the price of gas, which has also caused record high prices for electricity in the EU.

Heatwaves during summer 2022 have put additional pressure on energy markets, on the one hand causing increased demand of energy for cooling, and on the other decreased energy supply due to drought and the consequent reduction in the supply of hydropower. 

In [None]:
def create_box_plot(
    data, x_column, y_column, color_column, y_scale_domain=None
):
    if y_scale_domain is not None:
        chart = (
            alt.Chart(data)
            .mark_boxplot(
                opacity=0.3,
                size=50,
            )
            .encode(
                x=alt.X(f"year({x_column}):O", title="Year"),
                y=alt.Y(
                    f"{y_column}:Q",
                    title="Price",
                    scale=alt.Scale(domain=y_scale_domain),
                ),
                color=f"{color_column}:N",
            )
        )
    else:
        chart = (
            alt.Chart(data)
            .mark_boxplot(
                opacity=0.3,
                size=50,
            )
            .encode(
                x=alt.X(f"year({x_column}):O", title="Year"),
                y=alt.Y(f"{y_column}:Q", title="Price"),
                color=f"{color_column}:N",
            )
        )
    return chart


def create_swarm_plot(
    data, x_column, y_column, color_column, y_scale_domain=None
):
    if y_scale_domain is not None:
        chart = (
            alt.Chart(data)
            .mark_circle(size=30, opacity=0.7, color="black")
            .encode(
                x=alt.X(f"year({x_column}):O", title="Year"),
                y=alt.Y(
                    f"{y_column}:Q", scale=alt.Scale(domain=y_scale_domain)
                ),
                color=f"{color_column}:N",
            )
        )
    else:
        chart = (
            alt.Chart(data)
            .mark_circle(size=30, opacity=0.7, color="black")
            .encode(
                x=alt.X(f"year({x_column}):O", title="Year"),
                y=alt.Y(f"{y_column}:Q"),
                color=f"{color_column}:N",
            )
        )
    return chart


def create_outliers_distribution_chart(
    dataframe, date_column, features_list, title, y_scale_domain=None
):
    melted_df = pd.melt(
        dataframe.reset_index(),
        id_vars=[date_column],
        value_vars=features_list,
    )

    box_plot = create_box_plot(
        melted_df, date_column, "value", "variable", y_scale_domain
    )
    swarm_plot = create_swarm_plot(
        melted_df, date_column, "value", "variable", y_scale_domain
    )

    # Combine box plot and swarm plot
    chart = (box_plot + swarm_plot).properties(
        width=600, height=400, title=title
    )

    # Facet by the specified column
    final_chart = chart.facet(
        column=alt.Column(f"variable:N"),
    )

    return final_chart

In [None]:
chart = create_outliers_distribution_chart(
    df,
    "Date",
    ["PA6_Price", "best_price_compound"],
    "Price Distribution Over Years",
    [1000, 4000],
)
chart

In [None]:
chart = create_outliers_distribution_chart(
    df,
    "Date",
    ["CRUDE_PETRO", "CRUDE_BRENT", "CRUDE_DUBAI", "CRUDE_WTI"],
    "Crude Prices Distribution Over Years",
)
chart

In [None]:
chart = create_outliers_distribution_chart(
    df,
    "Date",
    ["NGAS_US", "NGAS_EUR", "NGAS_JP"],
    "Natural Gas Prices Distribution Over Years",
)
chart

Following the Covid-19 pandemic and its related restrictions, in beginning of 2020, there was a drop in demand for natural gas and energy in general. As a result the natural gas prices experienced record-low prices.

The recovery of economic activities was translated into increased energy demand, and natural gas prices regained their pre-pandemic levels by Q3/Q4 2020. The upward trend continued in 2021. 

Price on Natural Gas in Europe were affected by war much more than Natural Gas prices in Japan and United States

In [None]:
chart = create_outliers_distribution_chart(
    df,
    "Date",
    [
        "Electricty_Price_France",
        "Electricty_Price_Italy",
        "Electricty_Price_Poland",
        "Electricty_Price_Netherlands",
        "Electricty_Price_Germany",
    ],
    "Electricity Prices Distribution Over Years",
)
chart

Poland's success in containing prices was due, among other things, to Poland's electricity mix, which is still based on coal sourced from domestic mines. Therefore, Poland was less affected by the severe increases in global commodity prices observed in 2022.

Source: https://pkee.pl/en/aktualnosci/wojna-o-ceny-energii-podsumowanie-dzialan-oslonowych-na-rynkach-w-polscei-europie/#:~:text=As%20he%20points%20out%2C%20Poland's,commodity%20prices%20observed%20in%202022.

In [None]:
chart = create_outliers_distribution_chart(
    df,
    "Date",
    ["Inflation_rate_france"],
    "France Inflation Rate Distribution Over Years",
)
chart

In [None]:
chart = create_outliers_distribution_chart(
    df, "Date", ["Automotive Value"], "Compounds Price Distribution Over Years"
)
chart

In [None]:
chart = create_outliers_distribution_chart(
    df,
    "Date",
    ["Benzene_price", "Caprolactam_price", "Cyclohexane_price"],
    "Compounds Price Distribution Over Years",
)
chart

### Seasonality Analysis

In [None]:
df_cleaned = df.dropna(subset=["best_price_compound"])

# Multiplicative Decomposition
multiplicative_decomposition = seasonal_decompose(
    df_cleaned["best_price_compound"], model="multiplicative", period=30
)

# Additive Decomposition
additive_decomposition = seasonal_decompose(
    df_cleaned["best_price_compound"], model="additive", period=30
)

# Plot
plt.rcParams.update({"figure.figsize": (7, 5)})
multiplicative_decomposition.plot().suptitle(
    "Multiplicative Decomposition", fontsize=16
)
plt.tight_layout(rect=[0, 0.03, 1, 0.95])

additive_decomposition.plot().suptitle("Additive Decomposition", fontsize=16)
plt.tight_layout(rect=[0, 0.03, 1, 0.95])

plt.show()

In [None]:
# Test for seasonality

plt.rcParams.update({"figure.figsize": (8, 3), "figure.dpi": 120})
autocorrelation_plot(df_cleaned["best_price_compound"].tolist())

In [None]:
# CH test for seasonality

time_series = pd.Series(
    df_cleaned["best_price_compound"].values, index=df_cleaned["Date"]
)
CHTest(m=2).estimate_seasonal_differencing_term(time_series)