In [1]:
import numpy as np
import pandas as pd

In [2]:
# Read the CSV file
df = pd.read_csv('D:/Study-3rd/DSP/OP-heist-project/data/Regional_economic_and_environmental_data/Employment_MKregion_breakdown_more_industries.csv', encoding='iso-8859-1')

# Some cleaning
df = df.drop(['Sector','Transaction'], axis = 1)
new_column_names = {
    '2015 Original series': '2015',
    '2016 Original series': '2016',
    '2017 Original series': '2017',
    '2018 Original series': '2018',
    '2019 Original series': '2019',
    '2020 Original series': '2020'
}
df = df.rename(columns=new_column_names)

# Mapping to combine industry
mapping = {
    '10-12 Food industry etc.': 'C Manufacturing (10-33)',
    '13-15 Textile, clothing and leather industry': 'C Manufacturing (10-33)',
    '16 Manufacture of wood and of products of wood and cork, except furniture; manufacture of articles of straw and plaiting materials': 'C Manufacturing (10-33)',
    '17, 18 Paper industry; Printing': 'C Manufacturing (10-33)',
    '19-22 Chemical industry': 'C Manufacturing (10-33)',
    '23 Manufacture of other non-metallic mineral products': 'C Manufacturing (10-33)',
    '24-25 Manufacture of basic metals and fabricated metal products, except machinery and equipment': 'C Manufacturing (10-33)',
    '26, 27 Manufacture of electrical and electronic products': 'C Manufacturing (10-33)',
    '28 Manufacture of machinery and equipment n.e.c.': 'C Manufacturing (10-33)',
    '29, 30 Manufacture of transport equipment': 'C Manufacturing (10-33)',
    '31-33 Manufacture of furniture, other manufacturing; repair and installation of machinery and equipment': 'C Manufacturing (10-33)',
    '681, 68209, 683 Other real estate activities': 'L Real estate activities (68)',
    '68201, 68202 Letting and operation of dwellings': 'L Real estate activities (68)'
}
df['Industry'] = df['Industry'].map(lambda x: mapping.get(x, x))

# Define a custom aggregation function to combine rows column-wise
df = df.groupby(['Area', 'Industry']).agg({
    '2015': 'sum',
    '2016': 'sum',
    '2017': 'sum',
    '2018': 'sum',
    '2019': 'sum',
    '2020': 'sum'
}).reset_index()

# Calculating the percentages
df_filtered = df[df['Industry'] != 'Total']
year_columns = [str(year) for year in range(2015, 2021)]
df_filtered[year_columns] = df_filtered[year_columns].apply(pd.to_numeric)
whole_country_values = df_filtered[df_filtered['Area'] == 'WHOLE COUNTRY'][['Industry'] + year_columns]
merged_df = df_filtered.merge(whole_country_values, on='Industry', suffixes=('', '_whole_country'))
for year in year_columns:
    merged_df[year] = merged_df[year] / merged_df[year + '_whole_country']
merged_df.drop(columns=[year + '_whole_country' for year in year_columns], inplace=True)
df_result = merged_df.reset_index(drop=True)
# Transform the 'Industry' column to keep only the first character
df_result['Industry'] = df_result['Industry'].str[0]
# Df preview
df_result = pd.melt(df_result, id_vars=['Area', 'Industry'], var_name='Time', value_name='Value')
df_result

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered[year_columns] = df_filtered[year_columns].apply(pd.to_numeric)


Unnamed: 0,Area,Industry,Time,Value
0,MK01 Uusimaa,A,2015,0.064117
1,MK02 Southwest Finland,A,2015,0.093353
2,MK04 Satakunta,A,2015,0.049673
3,MK05 Kanta-Häme,A,2015,0.034266
4,MK06 Pirkanmaa,A,2015,0.067966
...,...,...,...,...
2155,MK17 North Ostrobothnia,T,2020,0.123362
2156,MK18 Kainuu,T,2020,0.024655
2157,MK19 Lapland,T,2020,0.061034
2158,MK21 Åland,T,2020,0.003276


In [3]:
df2 = pd.read_csv('D:/Study-3rd/DSP/OP-heist-project/data/tulli/international_trade_country_NACE_breakdown.csv', encoding = 'iso-8859-1')
df2 = df2[df2['NACE'].between('A', 'Z')]
df2['Time'] = df2['Time'].astype(str)
df2 = df2[df2['Time'].isin(['201512', '201612', '201712','201812','201912','202012'])]
df2['Time'] = df2['Time'].str[0:4]
df_import = df2[df2['Direction']=="Imports by countries of origin"].drop('Value (euro)',axis = 1)
df_import.rename(columns = {'Cum. value (euro) from the beginning of the year':'Value (euro)'},inplace = True)
df_export = df2[df2['Direction']=="Exports by countries of destination"].drop('Value (euro)',axis = 1)
df_export.rename(columns = {'Cum. value (euro) from the beginning of the year':'Value (euro)'},inplace = True)
df_import_export = df2[df2['Direction']!='Imports by countries of consignment'].drop('Value (euro)',axis = 1)
df_import_export.rename(columns = {'Cum. value (euro) from the beginning of the year':'Value (euro)'},inplace = True)

In [4]:
df_import_export

Unnamed: 0,NACE,Country,Time,Direction,Value (euro)
22860,A,AA,2020,Imports by countries of origin,207840198
22861,A,AA,2020,Exports by countries of destination,146067920
22863,A,AD,2020,Imports by countries of origin,0
22864,A,AD,2020,Exports by countries of destination,0
22866,A,AE,2020,Imports by countries of origin,743
...,...,...,...,...,...
1557922,X,ZA,2015,Exports by countries of destination,3676936
1557924,X,ZM,2015,Imports by countries of origin,0
1557925,X,ZM,2015,Exports by countries of destination,1891187
1557927,X,ZW,2015,Imports by countries of origin,5027


In [5]:
df_result

Unnamed: 0,Area,Industry,Time,Value
0,MK01 Uusimaa,A,2015,0.064117
1,MK02 Southwest Finland,A,2015,0.093353
2,MK04 Satakunta,A,2015,0.049673
3,MK05 Kanta-Häme,A,2015,0.034266
4,MK06 Pirkanmaa,A,2015,0.067966
...,...,...,...,...
2155,MK17 North Ostrobothnia,T,2020,0.123362
2156,MK18 Kainuu,T,2020,0.024655
2157,MK19 Lapland,T,2020,0.061034
2158,MK21 Åland,T,2020,0.003276


In [6]:
permutations = pd.merge(df_result, df_import_export, left_on=['Industry', 'Time'],right_on=['NACE', 'Time'], how='left')

In [7]:
# Define a custom function to convert values to int64 and replace non-numeric values with 0
def convert_to_int_or_zero(value):
    try:
        return int(value)
    except (ValueError, TypeError):
        return 0

# Apply the custom function to the 'Value (euro)' column
permutations['Value (euro)'] = permutations['Value (euro)'].apply(convert_to_int_or_zero)
permutations
def multiply_row(row):
    return row['Value'] * row['Value (euro)']

# Apply the custom function row-wise using apply with axis=1
permutations['Result'] = permutations.apply(multiply_row, axis=1)

In [8]:
permutations = permutations.drop(['Value (euro)','Value','NACE'],axis = 1)
permutations.rename(columns = {'Result':'Value (euro)'},inplace = True)

In [12]:
permutations.drop_duplicates
permutations.to_csv('D:/Study-3rd/DSP/OP-heist-project/data/international_trade/Import_export_regions_countries.csv',index=False)