In [1]:
import pandas as pd

In [2]:
# https://data.europa.eu/data/datasets/eu-ecolabel-products?locale=en
ethical_products_df = pd.read_csv('https://ecenvdatapublic.blob.core.windows.net/ecolabel/exports/most-recent-export.csv')

In [3]:
# https://datahub.io/core/country-list
country_codes_df = pd.read_csv('https://datahub.io/core/country-list/r/data.csv')

In [4]:
# set data types correct
ethical_products_df = ethical_products_df.convert_dtypes()
country_codes_df = country_codes_df.convert_dtypes()

In [5]:
# select only records with PRODUCT
df = ethical_products_df.loc[ethical_products_df['PRODUCT_SERVICE'] == 'PRODUCT']

In [6]:
# only keep relevant columns
df = df.drop(['PRODUCT_SERVICE', 'LIC_NR', 'GROUP_CODE', 'GROUP_NAME', 'EXTID_TYPE', 'EXT_ID', 'VAT', 'COMPANY_COUNTRY'], axis=1)

In [7]:
# rename column names to proper values
df = df.rename(columns={'NAME': 'ProductName', '\DECISION': 'Decision', 'EXPIRATION_DATE': 'ExpirationDate', 'COMPANY_NAME':'CompanyName', 'COUNTRY': 'Country'})

In [8]:
#merge country codes
df = pd.merge(df, 
              country_codes_df[['Name', 'Code']],
              left_on='Country',
              right_on='Code')
df = df.drop(['Country'], axis=1)
df = df.rename(columns={'Name': 'CountryName', 'Code': 'CountryCode'})

### Save CSVs

In [10]:
df[['CompanyName', 'ProductName']].drop_duplicates().to_csv("../../data/silver/has_product.csv", index=False, sep=',')
df[['CompanyName', 'CountryName']].drop_duplicates().to_csv("../../data/silver/located_in.csv", index=False, sep=',')
df[['ProductName', 'CountryName']].drop_duplicates().to_csv("../../data/silver/come_from.csv", index=False, sep=',')
df[['ProductName', 'Decision', 'ExpirationDate']].drop_duplicates().to_csv("../../data/silver/date.csv", index=False, sep=',')
df.drop_duplicates().to_csv("../../data/silver/complete_cleaned.csv", index=False, sep=',')

In [11]:
! ls -l ../../data/silver/*.csv

-rw-rw-r-- 1 ubuntu ubuntu 1688698 Apr 30 16:00 ../../data/silver/come_from.csv
-rw-rw-r-- 1 ubuntu ubuntu 3525979 Apr 30 16:00 ../../data/silver/complete_cleaned.csv
-rw-rw-r-- 1 ubuntu ubuntu 2530802 Apr 30 16:00 ../../data/silver/date.csv
-rw-rw-r-- 1 ubuntu ubuntu 2135500 Apr 30 16:00 ../../data/silver/has_product.csv
-rw-rw-r-- 1 ubuntu ubuntu   25062 Apr 30 16:00 ../../data/silver/located_in.csv


In [12]:
! tar czvf ../../data/silver/csvdata.tar.gz ../../data/silver/*.csv

tar: Removing leading `../../' from member names
../../data/silver/come_from.csv
tar: Removing leading `../../' from hard link targets
../../data/silver/complete_cleaned.csv
../../data/silver/date.csv
../../data/silver/has_product.csv
../../data/silver/located_in.csv
