In [1]:
import numpy as np
import pandas as pd

In [2]:
# Read the CSV file
df = pd.read_csv('D:/Study-3rd/DSP/OP-heist-project/data/Regional_economic_and_environmental_data/Employment_MKregion_breakdown_more_industries.csv', encoding='iso-8859-1')

# Some cleaning
df = df.drop(['Sector','Transaction'], axis = 1)
new_column_names = {
    '2015 Original series': '2015',
    '2016 Original series': '2016',
    '2017 Original series': '2017',
    '2018 Original series': '2018',
    '2019 Original series': '2019',
    '2020 Original series': '2020'
}
df = df.rename(columns=new_column_names)

# Mapping to combine industry
mapping = {
    '10-12 Food industry etc.': 'C Manufacturing (10-33)',
    '13-15 Textile, clothing and leather industry': 'C Manufacturing (10-33)',
    '16 Manufacture of wood and of products of wood and cork, except furniture; manufacture of articles of straw and plaiting materials': 'C Manufacturing (10-33)',
    '17, 18 Paper industry; Printing': 'C Manufacturing (10-33)',
    '19-22 Chemical industry': 'C Manufacturing (10-33)',
    '23 Manufacture of other non-metallic mineral products': 'C Manufacturing (10-33)',
    '24-25 Manufacture of basic metals and fabricated metal products, except machinery and equipment': 'C Manufacturing (10-33)',
    '26, 27 Manufacture of electrical and electronic products': 'C Manufacturing (10-33)',
    '28 Manufacture of machinery and equipment n.e.c.': 'C Manufacturing (10-33)',
    '29, 30 Manufacture of transport equipment': 'C Manufacturing (10-33)',
    '31-33 Manufacture of furniture, other manufacturing; repair and installation of machinery and equipment': 'C Manufacturing (10-33)',
    '681, 68209, 683 Other real estate activities': 'L Real estate activities (68)',
    '68201, 68202 Letting and operation of dwellings': 'L Real estate activities (68)'
}
df['Industry'] = df['Industry'].map(lambda x: mapping.get(x, x))

# Define a custom aggregation function to combine rows column-wise
df = df.groupby(['Area', 'Industry']).agg({
    '2015': 'sum',
    '2016': 'sum',
    '2017': 'sum',
    '2018': 'sum',
    '2019': 'sum',
    '2020': 'sum'
}).reset_index()

# Calculating the percentages
df_filtered = df[df['Industry'] != 'Total']
year_columns = [str(year) for year in range(2015, 2021)]
df_filtered[year_columns] = df_filtered[year_columns].apply(pd.to_numeric)
whole_country_values = df_filtered[df_filtered['Area'] == 'WHOLE COUNTRY'][['Industry'] + year_columns]
merged_df = df_filtered.merge(whole_country_values, on='Industry', suffixes=('', '_whole_country'))
for year in year_columns:
    merged_df[year] = merged_df[year] / merged_df[year + '_whole_country']
merged_df.drop(columns=[year + '_whole_country' for year in year_columns], inplace=True)
df_result = merged_df.reset_index(drop=True)
df_result['Industry'] = df_result['Industry'].str[0]
df_result = pd.melt(df_result, id_vars=['Area', 'Industry'], var_name='Time', value_name='Value')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered[year_columns] = df_filtered[year_columns].apply(pd.to_numeric)


In [3]:
df2 = pd.read_csv('D:/Study-3rd/DSP/OP-heist-project/data/tulli/international_trade_country_NACE_breakdown.csv', encoding = 'iso-8859-1')
df2 = df2[df2['NACE'].between('A', 'Z')]
df2['Time'] = df2['Time'].astype(str)
df2 = df2[df2['Time'].isin(['201512', '201612', '201712','201812','201912','202012'])]
df2['Time'] = df2['Time'].str[0:4]
df_import_export = df2[df2['Direction']!='Imports by countries of consignment'].drop('Value (euro)',axis = 1)
df_import_export.rename(columns = {'Cum. value (euro) from the beginning of the year':'Value (euro)'},inplace = True)

In [4]:
# Join dfs
permutations = pd.merge(df_result, df_import_export, left_on=['Industry', 'Time'],right_on=['NACE', 'Time'], how='left')
permutations

Unnamed: 0,Area,Industry,Time,Value,NACE,Country,Direction,Value (euro)
0,MK01 Uusimaa,A,2015,0.064117,A,AA,Imports by countries of origin,119236326
1,MK01 Uusimaa,A,2015,0.064117,A,AA,Exports by countries of destination,69020388
2,MK01 Uusimaa,A,2015,0.064117,A,AD,Imports by countries of origin,0
3,MK01 Uusimaa,A,2015,0.064117,A,AD,Exports by countries of destination,0
4,MK01 Uusimaa,A,2015,0.064117,A,AE,Imports by countries of origin,997
...,...,...,...,...,...,...,...,...
1158235,WHOLE COUNTRY,T,2020,1.000000,T,ZA,Exports by countries of destination,0
1158236,WHOLE COUNTRY,T,2020,1.000000,T,ZM,Imports by countries of origin,...
1158237,WHOLE COUNTRY,T,2020,1.000000,T,ZM,Exports by countries of destination,0
1158238,WHOLE COUNTRY,T,2020,1.000000,T,ZW,Imports by countries of origin,...


In [5]:
# Calculate exact values for each region
def convert_to_int_or_zero(value):
    try:
        return int(value)
    except (ValueError, TypeError):
        return 0
permutations['Value (euro)'] = permutations['Value (euro)'].apply(convert_to_int_or_zero)
permutations
def multiply_row(row):
    return row['Value'] * row['Value (euro)']
permutations['Result'] = permutations.apply(multiply_row, axis=1)

In [6]:
permutations = permutations.drop(['Value (euro)','Value','NACE'],axis = 1)
permutations.rename(columns = {'Result':'Value (euro)'},inplace = True)
permutations.drop_duplicates
permutations.to_csv('D:/Study-3rd/DSP/OP-heist-project/data/international_trade/Import_export_regions_countries.csv',index=False)
permutations.head()

Unnamed: 0,Area,Industry,Time,Country,Direction,Value (euro)
0,MK01 Uusimaa,A,2015,AA,Imports by countries of origin,7645083.0
1,MK01 Uusimaa,A,2015,AA,Exports by countries of destination,4425385.0
2,MK01 Uusimaa,A,2015,AD,Imports by countries of origin,0.0
3,MK01 Uusimaa,A,2015,AD,Exports by countries of destination,0.0
4,MK01 Uusimaa,A,2015,AE,Imports by countries of origin,63.92471
