In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
import yfinance as yf
from pandas_datareader import data as pdr

In [None]:
#read data
sets = pd.read_csv('sets.csv', sep=";")
themes = pd.read_csv('themes.csv')

In [None]:
#merge data into one dataframe
sets_themes = pd.merge(sets, themes, how='left', left_on='theme_id', right_on='id', suffixes=('_sets', '_themes'))
sets_themes = sets_themes.drop(['id', 'parent_id'], axis='columns')

In [None]:
# Number of sets released per year
sets_by_year = sets_themes.groupby("year").size().reset_index(name="set_num")

# Plot actual data
plt.plot(sets_by_year["year"][:-1], sets_by_year["set_num"][:-1], label="Actual")

# Add trend line
trendline = np.polyfit(sets_by_year["year"], sets_by_year["set_num"], 2)
p = np.poly1d(trendline)
plt.plot(sets_by_year["year"], p(sets_by_year["year"]), "r--", label="Linia trendu")

# Configure plot
plt.legend()
plt.title('Liczba wydawanych zestawów LEGO rok do roku')
plt.xlabel("Rok")
plt.ylabel("Liczba zestawów LEGO")
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
#Sets released per year (table)
sets_by_year_table = sets_themes.groupby("year").count()
sets_by_year_table["set_num"][:-1]

In [None]:
# Number of themes by year
themes_by_year = sets_themes.groupby("year")["theme_id"].nunique().reset_index(name="nr_themes")

# Calculate the trend line
z = np.polyfit(themes_by_year["year"].values[:-1], themes_by_year["nr_themes"].values[:-1], 2)
p = np.poly1d(z)

# Plot the data points and the trend line
plt.plot(themes_by_year["year"].values[:-1], themes_by_year["nr_themes"].values[:-1], label="Actual")
plt.plot(themes_by_year["year"].values[:-1], p(themes_by_year["year"].values[:-1]), "r--", label="Linia trendu")

# Configure the plot
plt.title('Liczba wydawanych serii tematycznych LEGO rok do roku')
plt.xlabel('Rok')
plt.ylabel('Liczba serii tematycznych')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()