In [0]:
import pandas as pd
import re

In [0]:
data_url = "https://raw.githubusercontent.com/apogiatzis/race-bar-chart-unemployment/master/data/unemployment_per_economic_activity_monthly.csv"
df = pd.read_csv(data_url)
df.head()

Unnamed: 0,NACE 2 CODE,Activity Code,Economic Activity,Year,Month,Anastoles-Metapoiisi,Anastoles-Touristiki,Termatismoi-Touristiki,Termatismoi-Alloi,Total
0,A,1,"Γεωργία, δασοκομία και αλιεία",2013,Ιανουάριος,0,0,0,159,159
1,B,2,Ορυχεία και λατομεία,2013,Ιανουάριος,0,0,0,62,62
2,C,3,Μεταποίηση,2013,Ιανουάριος,141,0,0,2690,2831
3,D,4,"Παροχή ηλεκτρικού ρεύματος, φυσικού αερίου, ατ...",2013,Ιανουάριος,0,0,0,15,15
4,E,5,"Παροχή νερού, επεξεργασία λυμάτων, διαχείριση ...",2013,Ιανουάριος,0,0,0,31,31


In [0]:
## Month Translation table 
## The prefix is to maintain sorted columns
month_gr2en = {'Ιανουάριος': '_01 Jan', 'Φεβρουάριος': '_02 Feb', 'Μάρτιος': '_03 Mar',
               'Απρίλιος': '_04 Apr', 'Μάιος':'_05 May', 'Ιούνιος':'_06 Jun',
               'Ιούλιος': '_07 Jul', 'Αύγουστος':'_08 Aug', 'Σεπτέμβριος': '_09 Sep',
               'Οκτώβριος':'_10 Oct', 'Νοέμβριος':'_11 Nov',
               'Δεκέμβριος': '_12 Dec'}

In [0]:
## Group by Economic Activity just to get the unique EA categories.
## Could be done with .unique() method but this makes sure there are
## not dangling categories.
df_grouped = df.groupby('Economic Activity')

## Group by Year and Month to group up the data 
## of the new dataframe per column.
df_year_groups = df.groupby(['Year', 'Month'])

In [0]:
EA_TOTAL_COLUMNS = [2, 9] # Indices of Economic Activity and Total columns

formatted_df = pd.DataFrame(index=df_grouped.groups.keys()) # Empty DF, only indices

## Create a new dataframe with the total unemployment per economic activity
## and concat with the previous one
for k,v in df_year_groups.groups.items():
  column_label = str(k[0]) + ' ' + month_gr2en[k[1]]
  aggregated = df.iloc[v, EA_TOTAL_COLUMNS].set_index('Economic Activity')
  aggregated.rename(columns={'Total': column_label}, inplace=True)
  formatted_df = pd.concat([formatted_df, aggregated], axis=1, join_axes=[formatted_df.index])  

## Because of groupby, columns are not sorted. Sort month colums
sorted_columns = list(formatted_df.columns.sort_values())

## Add category field + the sorted columns
formatted_df['category'] = range(len(df_grouped.groups.keys()))
formatted_df = formatted_df[['category'] + sorted_columns]

## Remove the prefix used for ordering from the month oclumns
formatted_df.columns = list(map(lambda col: re.sub(r"_[0-9]*[ \t]+","", col),
                               formatted_df.columns))

## Set Index
formatted_df.index.name = 'Economic Activity'

## Remove Unwanted Eaconomic Activity categories
formatted_df = formatted_df.drop(['Μη δηλωμένη οικονομική δραστηριότητα', 'Σύνολο'])

## Save to csv
formatted_df.to_csv('data.csv', encoding='utf-8-sig')

formatted_df.head()

Unnamed: 0_level_0,category,2013 Jan,2013 Feb,2013 Mar,2013 Apr,2013 May,2013 Jun,2013 Jul,2013 Aug,2013 Sep,...,2017 Mar,2017 Apr,2017 May,2017 Jun,2017 Jul,2017 Aug,2017 Sep,2017 Oct,2017 Nov,2017 Dec
Economic Activity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Άλλες δραστηριότητες παροχής υπηρεσιών,0,512,515,512,487,403,471,571,623,599,...,434,355,287,297,389,391,314,247,360,429
"Γεωργία, δασοκομία και αλιεία",1,159,159,159,165,157,156,162,164,170,...,99,95,111,105,107,99,96,96,70,68
Δημόσια διοίκηση και άμυνα. Υποχρεωτική κοινωνική ασφάλιση,2,3140,2775,2723,2767,2652,2651,2401,2188,2275,...,1769,1549,685,1058,1677,1739,1188,568,778,1759
Διαχείριση ακίνητης περιουσίας,3,202,201,177,179,177,163,166,172,168,...,283,253,231,186,170,95,84,91,136,159
Διοικητικές και υποστηρικτικές δραστηριότητες,4,849,827,820,784,594,545,514,478,472,...,686,530,423,296,297,290,264,290,666,780
