In [None]:
from mpl_toolkits import mplot3d

In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import dapla as dp
import pandas as pd
from tqdm.notebook import tqdm_notebook as pbar

In [None]:
help(dp.execute)

In [None]:
dp.show('/felles/mock')

In [None]:
yrs = [2019, 2024]

for year in range(yrs[0], yrs[1]+1):
    print(year)
    pop = dp.read_pandas(f'/felles/mock_sysselsatte/population_{year}_30000')
    comp = dp.read_pandas(f'/felles/mock_sysselsatte/companies_{year}_30000')
    pop['year'] = year
    if year == yrs[0]:
        pop_comp = pop.merge(comp, on = "work_id", how = "left")
    else:
        pop_comp = pop_comp.append(pop.merge(comp, on = "work_id", how = "left"))

In [None]:
pop_comp

In [None]:
workers = pop_comp[(pop_comp['work_id'].notna()) & 
                   (pop_comp['work_percent'].notna())]

In [None]:
# Make nace2 from nace5
workers['nace2'] = workers['nace'].str[:2].astype(int)

In [None]:
workers['year']

In [None]:
#workers

# Trisurf plot from matplotlib

In [None]:
ax = plt.axes(projection='3d')
plot = ax.plot_trisurf(workers['age'], workers['work_percent'], workers['nace2'], linewidth=0, antialiased=False, cmap='viridis')
ax.set_title('Work percent over age and nace2-code');
ax.set_ylabel('work percent')
ax.set_xlabel('age')
ax.set_zlabel('nace2')

### Make a dataset for companies with averages on age and work_percent per year

In [None]:
# Already generated and saved here:
companies = dp.read_pandas('/felles/mock_sysselsatte/companies_withworkaverages_20192024')

In [None]:
# If it can be loaded you can skip the cell below
companies

In [None]:
for year in pbar(range(yrs[0], yrs[1]+1)):
    print("Starting", year)
    comp_tmp = pd.DataFrame(workers[workers['year'] == year]['work_id'].unique())
    comp_tmp.columns = ['work_id']
    comp_tmp['year'] = year
    
    for i, row in pbar(comp_tmp.iterrows(), total = len(comp_tmp)):
        # employee_points
        comp_tmp.loc[i, 'employee_points'] = workers[(workers['work_id'] == row["work_id"]) &
                                                      (workers['year'] == year)]['employee_points'].iloc[0]
        # nace2-code
        comp_tmp.loc[i, 'nace2'] = workers[(workers['work_id'] == row["work_id"]) &
                                                      (workers['year'] == year)]['nace2'].iloc[0]
        # regione_code
        comp_tmp.loc[i, 'region_code'] = workers[(workers['work_id'] == row["work_id"]) &
                                                      (workers['year'] == year)]['region_code'].astype(int).iloc[0]

        # average age
        comp_tmp.loc[i, 'age_avg'] = workers[(workers['work_id'] == row["work_id"]) &
                                                      (workers['year'] == year)]['age'].mean()
        # average work_percent
        comp_tmp.loc[i, 'work_percent_avg'] = workers[(workers['work_id'] == row["work_id"]) &
                                                      (workers['year'] == year)]['work_percent'].mean()
    if year == yrs[0]:
        companies = comp_tmp
    else:
        companies = companies.append(comp_tmp)
    print("Finished", year)

In [None]:
companies

In [None]:
dp.write_pandas(companies, '/felles/mock_sysselsatte/companies_withworkaverages_20192024', valuation = "OPEN", state ="PROCESSED")

# Plot the new companies-dataset in a 3D-scatterplot in Plotly

In [None]:
import plotly.express as px
fig = px.scatter_3d(companies, x='age_avg', y='region_code', z='work_percent_avg',
              color='nace2', size="employee_points", opacity=0.7, animation_frame="year", animation_group = "work_id",
                    hover_data = ['work_id']
                   )

fig.update_layout(
    scene = dict(
        xaxis = dict(nticks=4, range=[18,85], autorange = False),
        yaxis = dict(nticks=4, range=[0, 50], autorange = False),
        zaxis = dict(nticks=4, range=[40, 100], autorange = False),),
    width=700, height = 700,
    margin=dict(r=20, l=10, b=10, t=10))

fig.show()