In [2588]:
# Import requests package:
import requests

In [2589]:
# Set URL as url:
url = 'https://www.ec.europa.eu/agrifood/api/poultry/egg/prices?'

In [2590]:
# Create parameter dictionary:
parameters = {
    'beginDate': '01/01/2021',
    'endDate': '22/01/2024'}

In [2591]:
# Send get request and save in variable r:
r = requests.get(url, parameters)

# Print result:
print(r)

<Response [200]>


In [2592]:
# Print headers:
r.headers

{'Date': 'Fri, 09 Feb 2024 15:19:36 GMT', 'Content-Type': 'application/json', 'Server': 'Europa', 'Connection': 'close', 'Content-Encoding': 'gzip'}

In [2593]:
# Print cookies:
r.cookies

<RequestsCookieJar[]>

In [2594]:
# Print encoding:
r.encoding

'utf-8'

In [2595]:
# Apply JSON decoder and save in prices_eggs_prelim:
prices_eggs_prelim = r.json()

# Print prices_eggs_prelim:
prices_eggs_prelim

[{'beginDate': '15/01/2024',
  'endDate': '21/01/2024',
  'price': '€334.49',
  'unit': '€/100Kg',
  'farmingMethod': 'Free range',
  'marketingYear': 2024,
  'memberStateCode': 'AT',
  'memberStateName': 'Austria'},
 {'beginDate': '15/01/2024',
  'endDate': '21/01/2024',
  'price': '€515.04',
  'unit': '€/100Kg',
  'farmingMethod': 'Organic',
  'marketingYear': 2024,
  'memberStateCode': 'AT',
  'memberStateName': 'Austria'},
 {'beginDate': '15/01/2024',
  'endDate': '21/01/2024',
  'price': '€266.79',
  'unit': '€/100Kg',
  'farmingMethod': 'Barn',
  'marketingYear': 2024,
  'memberStateCode': 'AT',
  'memberStateName': 'Austria'},
 {'beginDate': '15/01/2024',
  'endDate': '21/01/2024',
  'price': '€327.15',
  'unit': '€/100Kg',
  'farmingMethod': 'Organic',
  'marketingYear': 2024,
  'memberStateCode': 'BE',
  'memberStateName': 'Belgium'},
 {'beginDate': '15/01/2024',
  'endDate': '21/01/2024',
  'price': '€205.03',
  'unit': '€/100Kg',
  'farmingMethod': 'Cage',
  'marketingYear':

In [2596]:
# Print type of prices_eggs_prelim:
type(prices_eggs_prelim)

list

In [2597]:
# Import pandas package:
import pandas as pd

# Transform list to dataframe:
prices_eggs = pd.DataFrame(prices_eggs_prelim)

In [2598]:
# Show dataframe:
prices_eggs

Unnamed: 0,beginDate,endDate,price,unit,farmingMethod,marketingYear,memberStateCode,memberStateName
0,15/01/2024,21/01/2024,€334.49,€/100Kg,Free range,2024,AT,Austria
1,15/01/2024,21/01/2024,€515.04,€/100Kg,Organic,2024,AT,Austria
2,15/01/2024,21/01/2024,€266.79,€/100Kg,Barn,2024,AT,Austria
3,15/01/2024,21/01/2024,€327.15,€/100Kg,Organic,2024,BE,Belgium
4,15/01/2024,21/01/2024,€205.03,€/100Kg,Cage,2024,BE,Belgium
...,...,...,...,...,...,...,...,...
11757,04/01/2021,10/01/2021,€154.31,€/100Kg,Cage,2021,SI,Slovenia
11758,04/01/2021,10/01/2021,€88.64,€/100Kg,Cage,2021,ES,Spain
11759,04/01/2021,10/01/2021,€310.10,€/100Kg,Organic,2021,SE,Sweden
11760,04/01/2021,10/01/2021,€218.24,€/100Kg,Free range,2021,SE,Sweden


In [2599]:
# Show dataframe info:
prices_eggs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11762 entries, 0 to 11761
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   beginDate        11762 non-null  object
 1   endDate          11762 non-null  object
 2   price            11762 non-null  object
 3   unit             11762 non-null  object
 4   farmingMethod    11762 non-null  object
 5   marketingYear    11762 non-null  int64 
 6   memberStateCode  11762 non-null  object
 7   memberStateName  11762 non-null  object
dtypes: int64(1), object(7)
memory usage: 735.2+ KB


In [2600]:
# It appears that there are no null values.
# However, it is possible that there are null values that are not specified as such (and that are instead indicated with
# certain characters such as "." or similar).
# In order to check this, we first list the unique values of the variables that have only a limited number of values:

In [2601]:
prices_eggs.unit.unique()

array(['€/100Kg'], dtype=object)

In [2602]:
prices_eggs.farmingMethod.unique()

array(['Free range', 'Organic', 'Barn', 'Cage'], dtype=object)

In [2603]:
prices_eggs.marketingYear.unique()

array([2024, 2023, 2022, 2021])

In [2604]:
prices_eggs.memberStateCode.unique()

array(['AT', 'BE', 'BG', 'HR', 'CY', 'CZ', 'EE', 'FI', 'FR', 'DE', 'EL',
       'HU', 'IE', 'IT', 'LV', 'LT', 'MT', 'NL', 'PL', 'PT', 'RO', 'SK',
       'SI', 'ES', 'SE', 'DK'], dtype=object)

In [2605]:
prices_eggs.memberStateName.unique()

array(['Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czechia',
       'Estonia', 'Finland', 'France', 'Germany', 'Greece', 'Hungary',
       'Ireland', 'Italy', 'Latvia', 'Lithuania', 'Malta', 'Netherlands',
       'Poland', 'Portugal', 'Romania', 'Slovakia', 'Slovenia', 'Spain',
       'Sweden', 'Denmark'], dtype=object)

In [2606]:
# For beginDate, endDate, and price, we sort the data by the respective variable and show the first and last
# cases in order to find out whether there are null values that are not specified as such:

In [2607]:
prices_eggs[['beginDate']].sort_values(by='beginDate',ascending=True).head()

Unnamed: 0,beginDate
188,01/01/2024
210,01/01/2024
209,01/01/2024
208,01/01/2024
207,01/01/2024


In [2608]:
prices_eggs[['beginDate']].sort_values(by='beginDate',ascending=True).tail()

Unnamed: 0,beginDate
4971,31/10/2022
4970,31/10/2022
4969,31/10/2022
4938,31/10/2022
4917,31/10/2022


In [2609]:
prices_eggs[['endDate']].sort_values(by='endDate',ascending=True).head()

Unnamed: 0,endDate
4353,01/01/2023
4361,01/01/2023
4362,01/01/2023
4363,01/01/2023
4364,01/01/2023


In [2610]:
prices_eggs[['endDate']].sort_values(by='endDate',ascending=True).tail()

Unnamed: 0,endDate
284,31/12/2023
285,31/12/2023
286,31/12/2023
279,31/12/2023
231,31/12/2023


In [2611]:
prices_eggs[['price']].sort_values(by='price',ascending=True).head()

Unnamed: 0,price
9647,€100.23
10827,€100.24
11535,€100.25
9281,€100.43
9752,€100.53


In [2612]:
prices_eggs[['price']].sort_values(by='price',ascending=True).tail()

Unnamed: 0,price
11490,€99.22
10104,€99.31
10670,€99.36
10209,€99.55
9934,€99.58


In [2613]:
# We conclude that there are no null values in the dataframe.

In [2614]:
# Show variables:
prices_eggs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11762 entries, 0 to 11761
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   beginDate        11762 non-null  object
 1   endDate          11762 non-null  object
 2   price            11762 non-null  object
 3   unit             11762 non-null  object
 4   farmingMethod    11762 non-null  object
 5   marketingYear    11762 non-null  int64 
 6   memberStateCode  11762 non-null  object
 7   memberStateName  11762 non-null  object
dtypes: int64(1), object(7)
memory usage: 735.2+ KB


In [2615]:
# We do not need the marketingYear variable, especially as we do not know its exact relationship with beginDate and endDate:
prices_eggs = prices_eggs.drop(['marketingYear'], axis=1)

# We do not need the unit variable, as we know that prices are measured in €/100kg:
prices_eggs = prices_eggs.drop(['unit'], axis=1)

# Rename variable names to standard format and to names we can easily work with:
prices_eggs = prices_eggs.rename(columns={'beginDate': 'begin_date'})
prices_eggs = prices_eggs.rename(columns={'endDate': 'end_date'})
prices_eggs = prices_eggs.rename(columns={'farmingMethod': 'farming_method'})
prices_eggs = prices_eggs.rename(columns={'memberStateCode': 'country_code'})
prices_eggs = prices_eggs.rename(columns={'memberStateName': 'country'})

# Show variables again:
prices_eggs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11762 entries, 0 to 11761
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   begin_date      11762 non-null  object
 1   end_date        11762 non-null  object
 2   price           11762 non-null  object
 3   farming_method  11762 non-null  object
 4   country_code    11762 non-null  object
 5   country         11762 non-null  object
dtypes: object(6)
memory usage: 551.5+ KB


In [2616]:
# Remove the € symbol in the price values:
prices_eggs['price'] = prices_eggs['price'].str.replace('€', '')

# Check results:
prices_eggs

Unnamed: 0,begin_date,end_date,price,farming_method,country_code,country
0,15/01/2024,21/01/2024,334.49,Free range,AT,Austria
1,15/01/2024,21/01/2024,515.04,Organic,AT,Austria
2,15/01/2024,21/01/2024,266.79,Barn,AT,Austria
3,15/01/2024,21/01/2024,327.15,Organic,BE,Belgium
4,15/01/2024,21/01/2024,205.03,Cage,BE,Belgium
...,...,...,...,...,...,...
11757,04/01/2021,10/01/2021,154.31,Cage,SI,Slovenia
11758,04/01/2021,10/01/2021,88.64,Cage,ES,Spain
11759,04/01/2021,10/01/2021,310.10,Organic,SE,Sweden
11760,04/01/2021,10/01/2021,218.24,Free range,SE,Sweden


In [2617]:
# Transform begin_date and end_date to datetime format:
prices_eggs['begin_date'] = pd.to_datetime(prices_eggs['begin_date'], format='%d/%m/%Y')
prices_eggs['end_date'] = pd.to_datetime(prices_eggs['end_date'], format='%d/%m/%Y')

# Transform price to float:
prices_eggs['price'] = prices_eggs['price'].astype(float)

# Check results:
prices_eggs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11762 entries, 0 to 11761
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   begin_date      11762 non-null  datetime64[ns]
 1   end_date        11762 non-null  datetime64[ns]
 2   price           11762 non-null  float64       
 3   farming_method  11762 non-null  object        
 4   country_code    11762 non-null  object        
 5   country         11762 non-null  object        
dtypes: datetime64[ns](2), float64(1), object(3)
memory usage: 551.5+ KB


In [2618]:
# In order to calculate weighted averages of prices in the EU,
# we need to merge data about the population share of each country in each year.

In [2619]:
# Read in csv file containing data about populations:
population = pd.read_csv('data/population_eu_countries.csv')

In [2620]:
# Show dataframe:
population

Unnamed: 0,country_code,country,year,population,population_share
0,BE,Belgium,2021,11554767,0.0260
1,BE,Belgium,2022,11617623,0.0261
2,BE,Belgium,2023,11754004,0.0263
3,BE,Belgium,2024,11754004,0.0263
4,BG,Bulgaria,2021,6916548,0.0155
...,...,...,...,...,...
103,SE,Sweden,2024,10521556,0.0236
104,sum,total,2021,445094922,1.0000
105,sum,total,2022,444806786,1.0000
106,sum,total,2023,446410586,1.0000


In [2621]:
# Create a variable for the year, based on begin_date:
prices_eggs['year'] = prices_eggs['begin_date'].dt.year

In [2622]:
# Check results:
prices_eggs[['begin_date','year']]

Unnamed: 0,begin_date,year
0,2024-01-15,2024
1,2024-01-15,2024
2,2024-01-15,2024
3,2024-01-15,2024
4,2024-01-15,2024
...,...,...
11757,2021-01-04,2021
11758,2021-01-04,2021
11759,2021-01-04,2021
11760,2021-01-04,2021


In [2623]:
# We merge the population share of each country (identified by country_code) in each year
# to our prices_eggs dataframe:
prices_eggs = pd.merge(prices_eggs, population[['country_code','year','population_share']], on=['country_code','year'], how='left')

In [2624]:
# Check results:
prices_eggs

Unnamed: 0,begin_date,end_date,price,farming_method,country_code,country,year,population_share
0,2024-01-15,2024-01-21,334.49,Free range,AT,Austria,2024,0.0204
1,2024-01-15,2024-01-21,515.04,Organic,AT,Austria,2024,0.0204
2,2024-01-15,2024-01-21,266.79,Barn,AT,Austria,2024,0.0204
3,2024-01-15,2024-01-21,327.15,Organic,BE,Belgium,2024,0.0263
4,2024-01-15,2024-01-21,205.03,Cage,BE,Belgium,2024,0.0263
...,...,...,...,...,...,...,...,...
11757,2021-01-04,2021-01-10,154.31,Cage,SI,Slovenia,2021,0.0047
11758,2021-01-04,2021-01-10,88.64,Cage,ES,Spain,2021,0.1065
11759,2021-01-04,2021-01-10,310.10,Organic,SE,Sweden,2021,0.0233
11760,2021-01-04,2021-01-10,218.24,Free range,SE,Sweden,2021,0.0233


In [2625]:
# Check results with random samples of cases:
prices_eggs.sample(10)

Unnamed: 0,begin_date,end_date,price,farming_method,country_code,country,year,population_share
10041,2021-07-05,2021-07-11,338.09,Organic,PL,Poland,2021,0.085
2969,2023-04-24,2023-04-30,303.0,Free range,NL,Netherlands,2023,0.0399
8294,2021-12-20,2021-12-26,152.79,Barn,BG,Bulgaria,2021,0.0155
913,2023-10-30,2023-11-05,434.83,Organic,SI,Slovenia,2023,0.0047
11195,2021-03-08,2021-03-14,144.33,Cage,PL,Poland,2021,0.085
1044,2023-10-16,2023-10-22,182.22,Cage,LT,Lithuania,2023,0.0064
8393,2021-12-13,2021-12-19,147.39,Cage,HU,Hungary,2021,0.0219
2533,2023-06-05,2023-06-11,231.8,Barn,ES,Spain,2023,0.1077
5629,2022-08-29,2022-09-04,248.81,Barn,MT,Malta,2022,0.0001
5231,2022-10-03,2022-10-09,376.25,Free range,DE,Germany,2022,0.1871


In [2626]:
# We do not need the "year" variable anymore:
prices_eggs = prices_eggs.drop(['year'], axis=1)

In [2627]:
# The weighted average of prices is the sum of the products of weight (i.e., population share) and price.
# Therefore, as the first part of the weighted average of prices,
# we need the products of the population share and the price:
prices_eggs['weight_x_price'] = prices_eggs['population_share'] * prices_eggs['price']

In [2628]:
# Check results:
prices_eggs[['begin_date','end_date','country_code','country','population_share','price','weight_x_price']].sample(10)

Unnamed: 0,begin_date,end_date,country_code,country,population_share,price,weight_x_price
11652,2021-01-11,2021-01-17,AT,Austria,0.0201,244.88,4.922088
8891,2021-10-25,2021-10-31,EL,Greece,0.024,132.27,3.17448
6933,2022-04-25,2022-05-01,FI,Finland,0.0125,405.14,5.06425
11167,2021-03-08,2021-03-14,FI,Finland,0.0124,351.76,4.361824
5191,2022-10-10,2022-10-16,SK,Slovakia,0.0122,201.46,2.457812
9940,2021-07-12,2021-07-18,EE,Estonia,0.003,132.05,0.39615
10220,2021-06-14,2021-06-20,EE,Estonia,0.003,131.29,0.39387
5447,2022-09-12,2022-09-18,CZ,Czechia,0.0236,153.09,3.612924
10044,2021-07-05,2021-07-11,PT,Portugal,0.0231,192.46,4.445826
642,2023-11-20,2023-11-26,DE,Germany,0.189,383.84,72.54576


In [2629]:
# For each combination of date and farming_method,
# build the weighted average of EU prices as the sum of the products of weight and price
# (with "reset_index()", we make a dataframe from the results):
prices_eggs_eu = prices_eggs.groupby(['begin_date', 'end_date', 'farming_method'])['weight_x_price'].sum().reset_index()

# Check results:
prices_eggs_eu

Unnamed: 0,begin_date,end_date,farming_method,weight_x_price
0,2021-01-04,2021-01-10,Barn,63.419317
1,2021-01-04,2021-01-10,Cage,87.260217
2,2021-01-04,2021-01-10,Free range,130.828734
3,2021-01-04,2021-01-10,Organic,116.811479
4,2021-01-11,2021-01-17,Barn,66.583818
...,...,...,...,...
631,2024-01-08,2024-01-14,Organic,322.525781
632,2024-01-15,2024-01-21,Barn,198.781428
633,2024-01-15,2024-01-21,Cage,159.969558
634,2024-01-15,2024-01-21,Free range,231.765704


In [2630]:
# The variable weight_x_price now captures the price in the EU and should therefore be called "price":
prices_eggs_eu = prices_eggs_eu.rename(columns={'weight_x_price': 'price'})

# We add a "country_code" and "country" for the EU values:
prices_eggs_eu['country_code'] = "EU"
prices_eggs_eu['country'] = "European Union"

# Check results:
prices_eggs_eu

Unnamed: 0,begin_date,end_date,farming_method,price,country_code,country
0,2021-01-04,2021-01-10,Barn,63.419317,EU,European Union
1,2021-01-04,2021-01-10,Cage,87.260217,EU,European Union
2,2021-01-04,2021-01-10,Free range,130.828734,EU,European Union
3,2021-01-04,2021-01-10,Organic,116.811479,EU,European Union
4,2021-01-11,2021-01-17,Barn,66.583818,EU,European Union
...,...,...,...,...,...,...
631,2024-01-08,2024-01-14,Organic,322.525781,EU,European Union
632,2024-01-15,2024-01-21,Barn,198.781428,EU,European Union
633,2024-01-15,2024-01-21,Cage,159.969558,EU,European Union
634,2024-01-15,2024-01-21,Free range,231.765704,EU,European Union


In [2631]:
# Append the EU data to the country data:
prices_eggs = pd.concat([prices_eggs, prices_eggs_eu])

# Show result:
prices_eggs

Unnamed: 0,begin_date,end_date,price,farming_method,country_code,country,population_share,weight_x_price
0,2024-01-15,2024-01-21,334.490000,Free range,AT,Austria,0.0204,6.823596
1,2024-01-15,2024-01-21,515.040000,Organic,AT,Austria,0.0204,10.506816
2,2024-01-15,2024-01-21,266.790000,Barn,AT,Austria,0.0204,5.442516
3,2024-01-15,2024-01-21,327.150000,Organic,BE,Belgium,0.0263,8.604045
4,2024-01-15,2024-01-21,205.030000,Cage,BE,Belgium,0.0263,5.392289
...,...,...,...,...,...,...,...,...
631,2024-01-08,2024-01-14,322.525781,Organic,EU,European Union,,
632,2024-01-15,2024-01-21,198.781428,Barn,EU,European Union,,
633,2024-01-15,2024-01-21,159.969558,Cage,EU,European Union,,
634,2024-01-15,2024-01-21,231.765704,Free range,EU,European Union,,


In [2632]:
# We do not need the population_share and weight_x_price variables anymore:
prices_eggs = prices_eggs.drop(['population_share','weight_x_price'], axis=1)

# Show variables:
prices_eggs.info()

<class 'pandas.core.frame.DataFrame'>
Index: 12398 entries, 0 to 635
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   begin_date      12398 non-null  datetime64[ns]
 1   end_date        12398 non-null  datetime64[ns]
 2   price           12398 non-null  float64       
 3   farming_method  12398 non-null  object        
 4   country_code    12398 non-null  object        
 5   country         12398 non-null  object        
dtypes: datetime64[ns](2), float64(1), object(3)
memory usage: 678.0+ KB


In [2633]:
# We now want to calculate relative price premia of organic vs. conventional eggs
# (i.e., how much more expensive organic eggs are).

In [2634]:
# Show the farming methods:
prices_eggs.farming_method.unique()

array(['Free range', 'Organic', 'Barn', 'Cage'], dtype=object)

In [2635]:
# In the following, we will not consider the farming method "Cage",
# because this farming method does not exist in the data for some countries in the last years
# (e.g., in Germany it does not exist after December 2020).

In [2636]:
# Create dataframe specifically for farming method "Organic":
prices_eggs_organic = prices_eggs[prices_eggs['farming_method'] == "Organic"]

# Rename price variable to specify the farming method:
prices_eggs_organic = prices_eggs_organic.rename(columns={'price': 'price_organic'})

# Show dataframe:
prices_eggs_organic

Unnamed: 0,begin_date,end_date,price_organic,farming_method,country_code,country
1,2024-01-15,2024-01-21,515.040000,Organic,AT,Austria
3,2024-01-15,2024-01-21,327.150000,Organic,BE,Belgium
14,2024-01-15,2024-01-21,714.900000,Organic,CY,Cyprus
18,2024-01-15,2024-01-21,512.170000,Organic,EE,Estonia
22,2024-01-15,2024-01-21,374.930000,Organic,FI,Finland
...,...,...,...,...,...,...
619,2023-12-18,2023-12-24,287.285031,Organic,EU,European Union
623,2023-12-25,2023-12-31,286.455523,Organic,EU,European Union
627,2024-01-01,2024-01-07,321.879906,Organic,EU,European Union
631,2024-01-08,2024-01-14,322.525781,Organic,EU,European Union


In [2637]:
# Create dataframe specifically for farming method "Free range":
prices_eggs_free_range = prices_eggs[prices_eggs['farming_method'] == "Free range"]

# Rename price variable to specify the farming method:
prices_eggs_free_range = prices_eggs_free_range.rename(columns={'price': 'price_free_range'})

# Show dataframe:
prices_eggs_free_range

Unnamed: 0,begin_date,end_date,price_free_range,farming_method,country_code,country
0,2024-01-15,2024-01-21,334.490000,Free range,AT,Austria
6,2024-01-15,2024-01-21,292.470000,Free range,BE,Belgium
7,2024-01-15,2024-01-21,290.330000,Free range,BG,Bulgaria
12,2024-01-15,2024-01-21,364.860000,Free range,CY,Cyprus
21,2024-01-15,2024-01-21,345.350000,Free range,EE,Estonia
...,...,...,...,...,...,...
618,2023-12-18,2023-12-24,233.514373,Free range,EU,European Union
622,2023-12-25,2023-12-31,227.863173,Free range,EU,European Union
626,2024-01-01,2024-01-07,227.439473,Free range,EU,European Union
630,2024-01-08,2024-01-14,233.068679,Free range,EU,European Union


In [2638]:
# Create dataframe specifically for farming method "Barn":
prices_eggs_barn = prices_eggs[prices_eggs['farming_method'] == "Barn"]

# Rename price variable to specify the farming method:
prices_eggs_barn = prices_eggs_barn.rename(columns={'price': 'price_barn'})

# Show dataframe:
prices_eggs_barn

Unnamed: 0,begin_date,end_date,price_barn,farming_method,country_code,country
2,2024-01-15,2024-01-21,266.790000,Barn,AT,Austria
5,2024-01-15,2024-01-21,255.780000,Barn,BE,Belgium
9,2024-01-15,2024-01-21,251.820000,Barn,BG,Bulgaria
10,2024-01-15,2024-01-21,277.110000,Barn,HR,Croatia
15,2024-01-15,2024-01-21,264.500000,Barn,CY,Cyprus
...,...,...,...,...,...,...
616,2023-12-18,2023-12-24,203.524783,Barn,EU,European Union
620,2023-12-25,2023-12-31,201.812977,Barn,EU,European Union
624,2024-01-01,2024-01-07,202.410913,Barn,EU,European Union
628,2024-01-08,2024-01-14,198.496147,Barn,EU,European Union


In [2639]:
# Now we stepwise merge the prices for free-range eggs and barn eggs (for each date and country_code)
# to the dataframe for organic eggs and save the result in the prices_eggs dataframe:
prices_eggs = pd.merge(prices_eggs_organic, prices_eggs_free_range[['begin_date','end_date','country_code','price_free_range']], on=['begin_date','end_date','country_code'], how='left')
prices_eggs = pd.merge(prices_eggs, prices_eggs_barn[['begin_date','end_date','country_code','price_barn']], on=['begin_date','end_date','country_code'], how='left')

# We do not need the variable "farming_method" anymore:
prices_eggs = prices_eggs.drop(['farming_method'], axis=1)

# Show dataframe:
prices_eggs

Unnamed: 0,begin_date,end_date,price_organic,country_code,country,price_free_range,price_barn
0,2024-01-15,2024-01-21,515.040000,AT,Austria,334.490000,266.790000
1,2024-01-15,2024-01-21,327.150000,BE,Belgium,292.470000,255.780000
2,2024-01-15,2024-01-21,714.900000,CY,Cyprus,364.860000,264.500000
3,2024-01-15,2024-01-21,512.170000,EE,Estonia,345.350000,273.200000
4,2024-01-15,2024-01-21,374.930000,FI,Finland,,222.640000
...,...,...,...,...,...,...,...
2611,2023-12-18,2023-12-24,287.285031,EU,European Union,233.514373,203.524783
2612,2023-12-25,2023-12-31,286.455523,EU,European Union,227.863173,201.812977
2613,2024-01-01,2024-01-07,321.879906,EU,European Union,227.439473,202.410913
2614,2024-01-08,2024-01-14,322.525781,EU,European Union,233.068679,198.496147


In [2640]:
# Relative price premium for organic vs. free-range eggs in percent:
prices_eggs['price_organic_free_range'] = ( ( prices_eggs['price_organic'] / prices_eggs['price_free_range'] ) - 1 ) * 100

# Relative price premium for organic vs. barn eggs in percent:
prices_eggs['price_organic_barn'] = ( ( prices_eggs['price_organic'] / prices_eggs['price_barn'] ) - 1 ) * 100

# Show results:
prices_eggs

Unnamed: 0,begin_date,end_date,price_organic,country_code,country,price_free_range,price_barn,price_organic_free_range,price_organic_barn
0,2024-01-15,2024-01-21,515.040000,AT,Austria,334.490000,266.790000,53.977697,93.050714
1,2024-01-15,2024-01-21,327.150000,BE,Belgium,292.470000,255.780000,11.857626,27.902885
2,2024-01-15,2024-01-21,714.900000,CY,Cyprus,364.860000,264.500000,95.938168,170.283554
3,2024-01-15,2024-01-21,512.170000,EE,Estonia,345.350000,273.200000,48.304619,87.470717
4,2024-01-15,2024-01-21,374.930000,FI,Finland,,222.640000,,68.401904
...,...,...,...,...,...,...,...,...,...
2611,2023-12-18,2023-12-24,287.285031,EU,European Union,233.514373,203.524783,23.026702,41.154815
2612,2023-12-25,2023-12-31,286.455523,EU,European Union,227.863173,201.812977,25.713830,41.941082
2613,2024-01-01,2024-01-07,321.879906,EU,European Union,227.439473,202.410913,41.523326,59.023000
2614,2024-01-08,2024-01-14,322.525781,EU,European Union,233.068679,198.496147,38.382292,62.484656


In [2641]:
# We now want to upload the dataframe to the database on the server.

In [2642]:
# Import sql_functions.py because we need some functions from that module:
import sql_functions as sqlf

# We need to restart the kernel and rerun at this point if we changed the module since we first imported it.

In [2643]:
# Create a variable called engine using the get_engine function:
engine = sqlf.get_engine()

In [2644]:
# We set the schema to our course name:
schema = 'capstone_organicfood'

# We set table_name to our group name + the name of the dataframe:
table_name = 'prices_eggs'

In [2645]:
# We need psycopg2 for raising possible error message:
import psycopg2

In [2646]:
# Write records stored in dataframe to SQL database:
if engine!=None:
    try:
        prices_eggs.to_sql(name=table_name, # name of SQL table variable
                        con=engine, # engine or connection
                        schema=schema, # our class schema variable
                        if_exists='replace', # Drop the table before inserting new values
                        index=False, # Write DataFrame index as a column
                        chunksize=5000, # Specify the number of rows in each batch to be written at a time
                        method='multi') # Pass multiple values in a single INSERT clause
        print(f"The {table_name} table was imported successfully.")
    # Error handling
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        engine = None
else:
    print('No engine')

The prices_eggs table was imported successfully.


In [2647]:
# Test: query the newly created table to count the rows (we know from above that the dataframe has 2,616 cases):
sqlf.get_dataframe(f'SELECT COUNT(*) FROM {schema}.prices_eggs;')

Unnamed: 0,count
0,2616


In [2648]:
# Worked!