In [1]:
import pandas as pd
import numpy as np
from functions import *
import re

%load_ext autoreload
%autoreload 2

In [2]:
# set paths to data
path_market = '/Users/johan/Library/CloudStorage/GoogleDrive-johan.oelgaard@gmail.com/My Drive/04 Økonomi/10 Thesis/Data'

oxford = 'oxford_economics.xlsx'
oxford_df = pd.read_excel(path_market + '/' + oxford)

In [3]:
# industry data
oxford = oxford_df.iloc[:, :-11]
# extract the nace codes
nace_cols = ['Indicator', 'Sector', 'Nace code']
oxford["NACE"] = oxford.apply(lambda row: extract_nace(row, nace_cols), axis=1)

# remove commas from 'Sector' and make lower case
oxford['Sector'] = oxford['Sector'].str.replace(',', '').str.lower()


denmark = oxford[oxford['Location'] == 'Denmark'].copy().reset_index(drop=True)
europe = oxford[oxford['Location'] == 'Europe'].copy().reset_index(drop=True)
world = oxford[oxford['Location'] == 'World'].copy().reset_index(drop=True)

# explode the NACE column
denmark = denmark.explode('NACE')
denmark['NACE industry'] = denmark['NACE'].str.split('.', expand=True)[0]
# denmark['NACE subindustry'] = denmark['NACE'].str.split('.', expand=True)[1]
denmark['NACE'] = denmark['NACE'].apply(pd.to_numeric, errors='coerce')

# remove where 'Sector' is 'whole economy'
whole_denmark = denmark[denmark['Sector'] == 'whole economy'].copy().reset_index(drop=True)
denmark = denmark[denmark['Sector'] != 'whole economy'].copy().reset_index(drop=True)

# drop columns
denmark.drop(columns=['Location', 'Sector', 'Nace code','NACE'], inplace=True)
indicators = ['Production index', 'Gross output (sales)', 'Intermediate consumption', 'Investment', 'Value-added output']

# create subdataframes for each indicator
prodind_dk = denmark[denmark['Indicator'] == 'Production index'].copy().reset_index(drop=True)
prodind_dk = prodind_dk.groupby('NACE industry').mean(numeric_only=True).reset_index()
groout_dk = denmark[denmark['Indicator'] == 'Gross output (sales)'].copy().reset_index(drop=True)
groout_dk = groout_dk.groupby('NACE industry').mean(numeric_only=True).reset_index()
intcon_dk = denmark[denmark['Indicator'] == 'Intermediate consumption'].copy().reset_index(drop=True)
intcon_dk = intcon_dk.groupby('NACE industry').mean(numeric_only=True).reset_index()
inv_dk = denmark[denmark['Indicator'] == 'Investment'].copy().reset_index(drop=True)
inv_dk = inv_dk.groupby('NACE industry').mean(numeric_only=True).reset_index()
valadd_dk = denmark[denmark['Indicator'] == 'Value-added output'].copy().reset_index(drop=True)
valadd_dk = valadd_dk.groupby('NACE industry').mean(numeric_only=True).reset_index()

# display(prodind_dk)
# display(groout_dk)
# display(intcon_dk)
# display(inv_dk)
# display(valadd_dk)

In [4]:
# 1) Melt to get a "Quarter" column
prodind_melted = prodind_dk.melt(
    id_vars='NACE industry',
    var_name='quarter',      # formerly the column headers like "2001Q1", "2001Q2", ...
    value_name='prodind'
)
# 2) Add a column labeling these rows as "Production index"
prodind_melted['Indicator'] = 'Production index'

# Do the same for the other sub‐dataframes
groout_melted = groout_dk.melt(
    id_vars='NACE industry',
    var_name='quarter',
    value_name='grosout'
)

intcon_melted = intcon_dk.melt(
    id_vars='NACE industry',
    var_name='quarter',
    value_name='intcon'
)
inv_melted = inv_dk.melt(
    id_vars='NACE industry',
    var_name='quarter',
    value_name='inv'
)
valadd_melted = valadd_dk.melt(
    id_vars='NACE industry',
    var_name='quarter',
    value_name='valadd'
)




In [16]:
# remove rows where 'Indicator' is not in the list
denmark_melt = denmark[denmark['Indicator'].isin(indicators)].copy().reset_index(drop=True)
denmark_melt = denmark_melt.groupby(['NACE industry', 'Indicator']).mean(numeric_only=True).reset_index()

denmark_melt = denmark_melt.melt(
    id_vars=['NACE industry', 'Indicator'],
    var_name='quarter',
    value_name='value'
)

denmark_wide = denmark_melt.pivot_table(
    index=['NACE industry', 'quarter'],
    columns='Indicator',
    values='value'
).reset_index()

display(denmark_wide)

Indicator,NACE industry,quarter,Gross output (sales),Intermediate consumption,Investment,Production index,Value-added output
0,1,2001Q1,36.4750,22.890000,32.443333,102.755000,13.580000
1,1,2001Q2,37.4700,23.705000,32.403333,104.140000,13.765000
2,1,2001Q3,39.6000,25.140000,32.300000,106.230000,14.460000
3,1,2001Q4,37.0750,23.500000,32.126667,102.785000,13.575000
4,1,2002Q1,37.2950,23.790000,31.433333,100.650000,13.505000
...,...,...,...,...,...,...,...
9895,99,2024Q4,551.8725,248.276667,61.240000,114.486667,177.586667
9896,99,2025Q1,551.0550,246.823333,61.644000,114.786667,178.513333
9897,99,2025Q2,548.5775,244.793333,61.842000,114.873333,178.833333
9898,99,2025Q3,546.0175,242.770000,61.950000,114.953333,179.133333


In [9]:
denmark_filtered = denmark[denmark['Indicator'].isin(indicators)].copy()
denmark_filtered = denmark_filtered.groupby(['NACE industry', 'Indicator']).mean(numeric_only=True).reset_index()

# Set the identifying columns as the index.
# All other columns (the quarters) become part of the dataframe's columns.
df_indexed = denmark_filtered.set_index(['NACE industry', 'Indicator'])

# Use .stack() to transform the quarter columns into a single column.
# This is functionally similar to melt but uses the stack method.
df_stacked = df_indexed.stack().reset_index()

# Rename the resulting columns appropriately.
df_stacked.columns = ['NACE industry', 'Indicator', 'quarter', 'value']

# Now pivot so that each indicator becomes its own column.
df_wide = df_stacked.pivot_table(
    index=['NACE industry', 'quarter'],
    columns='Indicator',
    values='value'
).reset_index()

# Optionally, flatten the columns if you see a MultiIndex:
df_wide.columns.name = None

display(df_wide)

Unnamed: 0,NACE industry,quarter,Gross output (sales),Intermediate consumption,Investment,Production index,Value-added output
0,1,2001Q1,36.4750,22.890000,32.443333,102.755000,13.580000
1,1,2001Q2,37.4700,23.705000,32.403333,104.140000,13.765000
2,1,2001Q3,39.6000,25.140000,32.300000,106.230000,14.460000
3,1,2001Q4,37.0750,23.500000,32.126667,102.785000,13.575000
4,1,2002Q1,37.2950,23.790000,31.433333,100.650000,13.505000
...,...,...,...,...,...,...,...
9895,99,2024Q4,551.8725,248.276667,61.240000,114.486667,177.586667
9896,99,2025Q1,551.0550,246.823333,61.644000,114.786667,178.513333
9897,99,2025Q2,548.5775,244.793333,61.842000,114.873333,178.833333
9898,99,2025Q3,546.0175,242.770000,61.950000,114.953333,179.133333
