# Calculate tax revenue growth of regions in Finland

Data source: vero.fi

Link: https://www.vero.fi/tietoa-verohallinnosta/tilastot/avoin_dat/

In [2]:
import pandas as pd
import numpy as np

In [3]:
# Create the base with region column DataFrame
base_df = pd.read_csv('../data/Indices/population_density_index.csv')
base_df = base_df[['Region code', 'Region name (en)', 'Region name (fi)']].copy()

base_df

Unnamed: 0,Region code,Region name (en),Region name (fi)
0,MK01,Uusimaa,Uusimaa
1,MK02,Southwest Finland,Varsinais-Suomi
2,MK04,Satakunta,Satakunta
3,MK05,Kanta-Häme,Kanta-Häme
4,MK06,Pirkanmaa,Pirkanmaa
5,MK07,Päijät-Häme,Päijät-Häme
6,MK08,Kymenlaakso,Kymenlaakso
7,MK09,South Karelia,Etelä-Karjala
8,MK10,South Savo,Etelä-Savo
9,MK11,North Savo,Pohjois-Savo


In [12]:
# Load tax data
TAX_DF = pd.read_pickle('../data/tax_data/full_tax_data_with_region_and_industry.pkl.zip')
TAX_DF

Unnamed: 0,Year,Business_ID,Name,Tax_Region,Tax_Revenue,Total_Amount_Paid,Tax_Advance,Tax_Return,Residual_Tax,Municipality_Code,Municipality,Code of region,Name of region in Finnish,Section
0,2011,0568703-2,Brändö Lax Ab,035 BRÄNDÖ,857376.69,222917.94,222913.97,0.00,0.00,035,Ahvenanmaa,21.0,Ahvenanmaa,A
1,2011,0841712-6,Norrfjärden Ab,035 BRÄNDÖ,0.00,0.00,0.00,0.00,0.00,035,Ahvenanmaa,21.0,Ahvenanmaa,K
2,2011,0144666-1,Brändö Andelshandel,035 BRÄNDÖ,0.00,-15.66,5369.70,5385.36,0.00,035,Ahvenanmaa,21.0,Ahvenanmaa,G
3,2011,0144668-8,Jurmo Andelshandel,035 BRÄNDÖ,0.00,300.00,0.00,0.00,300.00,035,Ahvenanmaa,21.0,Ahvenanmaa,G
4,2011,0200418-7,Lappo Handelslag,035 BRÄNDÖ,0.00,0.00,0.00,0.00,0.00,035,Ahvenanmaa,21.0,Ahvenanmaa,G
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3622762,2021,2769288-7,Eran Oy,050 Eura,105356.40,21405.03,21405.03,0.00,0.00,050,Satakunta,4.0,Satakunta,M
3622763,2021,2751387-7,Taljan Hallinta Oy,091 Helsinki,2665.81,533.16,1110.86,577.70,0.00,091,Uusimaa,1.0,Uusimaa,L
3622764,2021,2766401-8,Trimmi Inspire Oy,837 Tampere,0.00,0.00,0.00,0.00,0.00,837,Pirkanmaa,6.0,Pirkanmaa,M
3622765,2021,2741109-6,Ompelimo Taito-Ommel Oy,091 Helsinki,3934.83,786.97,678.48,0.00,108.49,091,Uusimaa,1.0,Uusimaa,C


In [26]:
tax_df = TAX_DF[['Year', 'Tax_Revenue', 'Code of region']].copy()
tax_df.dropna(inplace=True)
tax_df['Code of region'] = tax_df['Code of region'].apply(lambda x: f'MK{int(x):02d}')
tax_df.columns = ['Year', 'Tax Revenue', 'Region code']
tax_df

Unnamed: 0,Year,Tax Revenue,Region code
0,2011,857376.69,MK21
1,2011,0.00,MK21
2,2011,0.00,MK21
3,2011,0.00,MK21
4,2011,0.00,MK21
...,...,...,...
3622762,2021,105356.40,MK04
3622763,2021,2665.81,MK01
3622764,2021,0.00,MK06
3622765,2021,3934.83,MK01


In [35]:
# Group data by year and region
tax_region = tax_df.groupby(['Year', 'Region code']).sum().reset_index()

# Pivot the data for better visualization
tax_region = tax_region.pivot(index='Region code', columns='Year', values='Tax Revenue').reset_index()
tax_region.columns.name = None # Remove the column name
tax_region.columns = [str(col) for col in tax_region.columns] # Convert the column names to string

# Remove intermediate columns
tax_region = tax_region[['Region code', '2017', '2021']].copy()

# Calculate the growth index
tax_region['Tax Revenue Absolute Growth 2017-2021 (EUR)'] = tax_region['2021'] - tax_region['2017']
tax_region['Tax Revenue Relative Growth 2017-2021 (%)'] = (tax_region['2021'] - tax_region['2017']) / tax_region['2017'] * 100
tax_region

Unnamed: 0,Region code,2017,2021,Tax Revenue Absolute Growth 2017-2021 (EUR),Tax Revenue Relative Growth 2017-2021 (%)
0,MK01,19762160000.0,25013880000.0,5251724000.0,26.574647
1,MK02,2074112000.0,2490697000.0,416584400.0,20.084951
2,MK04,575411100.0,756771400.0,181360400.0,31.5184
3,MK05,402423600.0,919129500.0,516705900.0,128.398496
4,MK06,1595312000.0,2212021000.0,616709300.0,38.657598
5,MK07,463883800.0,619900900.0,156017200.0,33.632813
6,MK08,376749900.0,387041000.0,10291060.0,2.731535
7,MK09,174885600.0,400359400.0,225473900.0,128.926517
8,MK10,202132000.0,255590200.0,53458180.0,26.447165
9,MK11,541462700.0,819167500.0,277704800.0,51.287894


In [36]:
tax_region.drop(columns=['2017', '2021'], inplace=True)
tax_region

Unnamed: 0,Region code,Tax Revenue Absolute Growth 2017-2021 (EUR),Tax Revenue Relative Growth 2017-2021 (%)
0,MK01,5251724000.0,26.574647
1,MK02,416584400.0,20.084951
2,MK04,181360400.0,31.5184
3,MK05,516705900.0,128.398496
4,MK06,616709300.0,38.657598
5,MK07,156017200.0,33.632813
6,MK08,10291060.0,2.731535
7,MK09,225473900.0,128.926517
8,MK10,53458180.0,26.447165
9,MK11,277704800.0,51.287894


In [39]:
df = pd.merge(base_df, tax_region, on='Region code', how='inner')
df

Unnamed: 0,Region code,Region name (en),Region name (fi),Tax Revenue Absolute Growth 2017-2021 (EUR),Tax Revenue Relative Growth 2017-2021 (%)
0,MK01,Uusimaa,Uusimaa,5251724000.0,26.574647
1,MK02,Southwest Finland,Varsinais-Suomi,416584400.0,20.084951
2,MK04,Satakunta,Satakunta,181360400.0,31.5184
3,MK05,Kanta-Häme,Kanta-Häme,516705900.0,128.398496
4,MK06,Pirkanmaa,Pirkanmaa,616709300.0,38.657598
5,MK07,Päijät-Häme,Päijät-Häme,156017200.0,33.632813
6,MK08,Kymenlaakso,Kymenlaakso,10291060.0,2.731535
7,MK09,South Karelia,Etelä-Karjala,225473900.0,128.926517
8,MK10,South Savo,Etelä-Savo,53458180.0,26.447165
9,MK11,North Savo,Pohjois-Savo,277704800.0,51.287894


In [40]:
df.to_csv('../data/Indices/tax_revenue_index.csv', index=False)