Read data from csv

In [None]:
from pandas import read_csv
filename = 'dataset//earnings_split/EAR_4MMN_CUR_NB_A-filtered-2021-03-21.csv'
df_incomes_columns_csv = ["ref_area.label","classif1.label","time","obs_value"]
df_incomes_columns_names = ["Country","Currency","Year","Income"]
df_incomes = read_csv(filename,usecols=df_incomes_columns_csv)
df_incomes = df_incomes[(df_incomes["classif1.label"]=="Currency: U.S. dollars") & (df_incomes["time"]>2009)]
df_incomes = df_incomes.rename(columns={k:v for k,v in zip(df_incomes_columns_csv,df_incomes_columns_names)})

In [None]:
df_incomes.head()

In [None]:
filename = 'dataset//living_index//living_index.csv'
indices_columns = ["Country","Cost of Living Index","Year", "Local Purchasing Power Index"]
df_indices = read_csv(filename,usecols=indices_columns)
indices_by_country = [dfCountry  for country,dfCountry in df_indices.groupby('Country')]

In [None]:
from math import ceil
import matplotlib.pyplot as plt
def draw_df_by_groups(dfs,x_name,y_name,legend,groupSize=5):
    plots_count = len(dfs)
    plot_groups = int(ceil(plots_count/groupSize))
    fig, axs = plt.subplots(plot_groups,figsize=(18, plot_groups*5))
    for i in range(plots_count):
        dfs[i].plot(x=x_name,y=y_name,ax=axs[i//groupSize],ylabel=y_name)
        if i%groupSize == groupSize-1 or i==plots_count-1:
            axs[i//groupSize].legend(legend[i//groupSize*groupSize:i+1])
    fig.show()
countries = [country["Country"].values[0] for country in indices_by_country]      
draw_df_by_groups(indices_by_country,"Year","Cost of Living Index",countries)   

## Purchasing power index

In [None]:
from pandas import merge as pd_merge
result = pd_merge(df_indices,df_incomes, on=["Year", "Country"],how="inner")
print("Countries count after merge",result["Country"].unique().size)
result.head()

In [None]:
result["Custom power index param"] = result["Local Purchasing Power Index"]/result["Income"]

In [None]:
result.head()

## Fourier tranform

In [None]:
poland_Cost_of_living = df_indices.loc[df_indices['Country'] == "Poland"][["Cost of Living Index","Year"]]

In [None]:
years=poland_Cost_of_living.values[:,1]
cost_of_living_index=poland_Cost_of_living.values[:,0]
plt.plot(years,cost_of_living_index,'o')

In [None]:
from scipy.fft import fft, fftfreq
import numpy as np
# Number of sample points
N = cost_of_living_index.size
# sample spacing
T = 1
yf = fft(cost_of_living_index)
xf = fftfreq(N, T)[:N//2]
import matplotlib.pyplot as plt
plt.plot(xf, 2.0/N * np.abs(yf[0:N//2]))
plt.grid()
plt.show()

## Linear regression

In [None]:
x=poland_Cost_of_living.values[:,1]
y=poland_Cost_of_living.values[:,0]
predict_x = [x for x in range(2009,2026)]

In [None]:
import numpy as np
from sklearn.linear_model import LinearRegression

In [None]:
model = LinearRegression(n_jobs = -1)
model.fit(np.reshape(x,(-1,1)), y)
predicted_y = model.predict(np.reshape(predict_x,(-1,1)))

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(12,6))
plt.scatter(x, y, color = "red")
plt.plot(predict_x, predicted_y, color = "green")
plt.title("Cost of living index in Poland")
plt.xlabel("Year")
plt.ylabel("IndexLevel")
plt.show()

In [None]:
#ToDo regresja wielomianowa