In [379]:
# Import requests package:
import requests

In [380]:
# Set URL as url:
url = 'https://www.ec.europa.eu/agrifood/api/poultry/egg/prices?'

In [381]:
# Create parameter dictionary:
parameters = {
    'beginDate': '01/01/2021',
    'endDate': '22/01/2024'}

In [382]:
# Send get request and save in variable r:
r = requests.get(url, parameters)

# Print result:
print(r)

<Response [200]>


In [383]:
# Print headers:
r.headers

{'Date': 'Mon, 12 Feb 2024 13:59:01 GMT', 'Content-Type': 'application/json', 'Server': 'Europa', 'Connection': 'close', 'Content-Encoding': 'gzip'}

In [384]:
# Print cookies:
r.cookies

<RequestsCookieJar[]>

In [385]:
# Print encoding:
r.encoding

'utf-8'

In [386]:
# Apply JSON decoder and save in prices_eggs_prelim:
prices_eggs_prelim = r.json()

# Print prices_eggs_prelim:
prices_eggs_prelim

[{'beginDate': '15/01/2024',
  'endDate': '21/01/2024',
  'price': '€334.49',
  'unit': '€/100Kg',
  'farmingMethod': 'Free range',
  'marketingYear': 2024,
  'memberStateCode': 'AT',
  'memberStateName': 'Austria'},
 {'beginDate': '15/01/2024',
  'endDate': '21/01/2024',
  'price': '€515.04',
  'unit': '€/100Kg',
  'farmingMethod': 'Organic',
  'marketingYear': 2024,
  'memberStateCode': 'AT',
  'memberStateName': 'Austria'},
 {'beginDate': '15/01/2024',
  'endDate': '21/01/2024',
  'price': '€266.79',
  'unit': '€/100Kg',
  'farmingMethod': 'Barn',
  'marketingYear': 2024,
  'memberStateCode': 'AT',
  'memberStateName': 'Austria'},
 {'beginDate': '15/01/2024',
  'endDate': '21/01/2024',
  'price': '€292.47',
  'unit': '€/100Kg',
  'farmingMethod': 'Free range',
  'marketingYear': 2024,
  'memberStateCode': 'BE',
  'memberStateName': 'Belgium'},
 {'beginDate': '15/01/2024',
  'endDate': '21/01/2024',
  'price': '€327.15',
  'unit': '€/100Kg',
  'farmingMethod': 'Organic',
  'marketing

In [387]:
# Print type of prices_eggs_prelim:
type(prices_eggs_prelim)

list

In [388]:
# Import pandas package:
import pandas as pd

# Transform list to dataframe:
prices_eggs = pd.DataFrame(prices_eggs_prelim)

In [389]:
# Show dataframe:
prices_eggs

Unnamed: 0,beginDate,endDate,price,unit,farmingMethod,marketingYear,memberStateCode,memberStateName
0,15/01/2024,21/01/2024,€334.49,€/100Kg,Free range,2024,AT,Austria
1,15/01/2024,21/01/2024,€515.04,€/100Kg,Organic,2024,AT,Austria
2,15/01/2024,21/01/2024,€266.79,€/100Kg,Barn,2024,AT,Austria
3,15/01/2024,21/01/2024,€292.47,€/100Kg,Free range,2024,BE,Belgium
4,15/01/2024,21/01/2024,€327.15,€/100Kg,Organic,2024,BE,Belgium
...,...,...,...,...,...,...,...,...
11757,04/01/2021,10/01/2021,€154.31,€/100Kg,Cage,2021,SI,Slovenia
11758,04/01/2021,10/01/2021,€88.64,€/100Kg,Cage,2021,ES,Spain
11759,04/01/2021,10/01/2021,€310.10,€/100Kg,Organic,2021,SE,Sweden
11760,04/01/2021,10/01/2021,€218.24,€/100Kg,Free range,2021,SE,Sweden


In [390]:
# Show dataframe info:
prices_eggs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11762 entries, 0 to 11761
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   beginDate        11762 non-null  object
 1   endDate          11762 non-null  object
 2   price            11762 non-null  object
 3   unit             11762 non-null  object
 4   farmingMethod    11762 non-null  object
 5   marketingYear    11762 non-null  int64 
 6   memberStateCode  11762 non-null  object
 7   memberStateName  11762 non-null  object
dtypes: int64(1), object(7)
memory usage: 735.2+ KB


In [391]:
# It appears that there are no null values.
# However, it is possible that there are null values that are not specified as such (and that are instead indicated with
# certain characters such as "." or similar).
# In order to check this, we first list the unique values of the variables that have only a limited number of values:

In [392]:
prices_eggs.unit.unique()

array(['€/100Kg'], dtype=object)

In [393]:
prices_eggs.farmingMethod.unique()

array(['Free range', 'Organic', 'Barn', 'Cage'], dtype=object)

In [394]:
prices_eggs.marketingYear.unique()

array([2024, 2023, 2022, 2021])

In [395]:
prices_eggs.memberStateCode.unique()

array(['AT', 'BE', 'BG', 'HR', 'CY', 'CZ', 'EE', 'FI', 'FR', 'DE', 'EL',
       'HU', 'IE', 'IT', 'LV', 'LT', 'MT', 'NL', 'PL', 'PT', 'RO', 'SK',
       'SI', 'ES', 'SE', 'DK'], dtype=object)

In [396]:
prices_eggs.memberStateName.unique()

array(['Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czechia',
       'Estonia', 'Finland', 'France', 'Germany', 'Greece', 'Hungary',
       'Ireland', 'Italy', 'Latvia', 'Lithuania', 'Malta', 'Netherlands',
       'Poland', 'Portugal', 'Romania', 'Slovakia', 'Slovenia', 'Spain',
       'Sweden', 'Denmark'], dtype=object)

In [397]:
# For beginDate, endDate, and price, we sort the data by the respective variable and show the first and last
# cases in order to find out whether there are null values that are not specified as such:

In [398]:
prices_eggs[['beginDate']].sort_values(by='beginDate',ascending=True).head()

Unnamed: 0,beginDate
188,01/01/2024
210,01/01/2024
209,01/01/2024
208,01/01/2024
207,01/01/2024


In [399]:
prices_eggs[['beginDate']].sort_values(by='beginDate',ascending=True).tail()

Unnamed: 0,beginDate
4971,31/10/2022
4970,31/10/2022
4969,31/10/2022
4938,31/10/2022
4917,31/10/2022


In [400]:
prices_eggs[['endDate']].sort_values(by='endDate',ascending=True).head()

Unnamed: 0,endDate
4353,01/01/2023
4361,01/01/2023
4362,01/01/2023
4363,01/01/2023
4364,01/01/2023


In [401]:
prices_eggs[['endDate']].sort_values(by='endDate',ascending=True).tail()

Unnamed: 0,endDate
284,31/12/2023
285,31/12/2023
286,31/12/2023
279,31/12/2023
231,31/12/2023


In [402]:
prices_eggs[['price']].sort_values(by='price',ascending=True).head()

Unnamed: 0,price
9648,€100.23
10827,€100.24
11535,€100.25
9281,€100.43
9753,€100.53


In [403]:
prices_eggs[['price']].sort_values(by='price',ascending=True).tail()

Unnamed: 0,price
11491,€99.22
10104,€99.31
10670,€99.36
10209,€99.55
9935,€99.58


In [404]:
# We conclude that there are no null values in the dataframe.

In [405]:
# Show variables:
prices_eggs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11762 entries, 0 to 11761
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   beginDate        11762 non-null  object
 1   endDate          11762 non-null  object
 2   price            11762 non-null  object
 3   unit             11762 non-null  object
 4   farmingMethod    11762 non-null  object
 5   marketingYear    11762 non-null  int64 
 6   memberStateCode  11762 non-null  object
 7   memberStateName  11762 non-null  object
dtypes: int64(1), object(7)
memory usage: 735.2+ KB


In [406]:
# We do not need the marketingYear variable, especially as we do not know its exact relationship with beginDate and endDate:
prices_eggs = prices_eggs.drop(['marketingYear'], axis=1)

# We do not need the unit variable, as we know that prices are measured in €/100kg:
prices_eggs = prices_eggs.drop(['unit'], axis=1)

# Rename variable names to standard format and to names we can easily work with:
prices_eggs = prices_eggs.rename(columns={'beginDate': 'begin_date'})
prices_eggs = prices_eggs.rename(columns={'endDate': 'end_date'})
prices_eggs = prices_eggs.rename(columns={'farmingMethod': 'farming_method'})
prices_eggs = prices_eggs.rename(columns={'memberStateCode': 'country_code'})
prices_eggs = prices_eggs.rename(columns={'memberStateName': 'country'})

# Show variables again:
prices_eggs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11762 entries, 0 to 11761
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   begin_date      11762 non-null  object
 1   end_date        11762 non-null  object
 2   price           11762 non-null  object
 3   farming_method  11762 non-null  object
 4   country_code    11762 non-null  object
 5   country         11762 non-null  object
dtypes: object(6)
memory usage: 551.5+ KB


In [407]:
# Remove the € symbol in the price values:
prices_eggs['price'] = prices_eggs['price'].str.replace('€', '')

# Check results:
prices_eggs

Unnamed: 0,begin_date,end_date,price,farming_method,country_code,country
0,15/01/2024,21/01/2024,334.49,Free range,AT,Austria
1,15/01/2024,21/01/2024,515.04,Organic,AT,Austria
2,15/01/2024,21/01/2024,266.79,Barn,AT,Austria
3,15/01/2024,21/01/2024,292.47,Free range,BE,Belgium
4,15/01/2024,21/01/2024,327.15,Organic,BE,Belgium
...,...,...,...,...,...,...
11757,04/01/2021,10/01/2021,154.31,Cage,SI,Slovenia
11758,04/01/2021,10/01/2021,88.64,Cage,ES,Spain
11759,04/01/2021,10/01/2021,310.10,Organic,SE,Sweden
11760,04/01/2021,10/01/2021,218.24,Free range,SE,Sweden


In [408]:
# Transform begin_date and end_date to datetime format:
prices_eggs['begin_date'] = pd.to_datetime(prices_eggs['begin_date'], format='%d/%m/%Y')
prices_eggs['end_date'] = pd.to_datetime(prices_eggs['end_date'], format='%d/%m/%Y')

# Transform price to float:
prices_eggs['price'] = prices_eggs['price'].astype(float)

# Check results:
prices_eggs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11762 entries, 0 to 11761
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   begin_date      11762 non-null  datetime64[ns]
 1   end_date        11762 non-null  datetime64[ns]
 2   price           11762 non-null  float64       
 3   farming_method  11762 non-null  object        
 4   country_code    11762 non-null  object        
 5   country         11762 non-null  object        
dtypes: datetime64[ns](2), float64(1), object(3)
memory usage: 551.5+ KB


In [409]:
# In order to calculate weighted averages of prices in the EU,
# we need to merge data about the population share of each country in each year.

In [410]:
# Read in csv file containing data about populations:
population = pd.read_csv('data/population_eu_countries.csv')

In [411]:
# Show dataframe:
population

Unnamed: 0,country_code,country,year,population,population_share
0,BE,Belgium,2021,11554767,0.0260
1,BE,Belgium,2022,11617623,0.0261
2,BE,Belgium,2023,11754004,0.0263
3,BE,Belgium,2024,11754004,0.0263
4,BG,Bulgaria,2021,6916548,0.0155
...,...,...,...,...,...
103,SE,Sweden,2024,10521556,0.0236
104,sum,total,2021,445094922,1.0000
105,sum,total,2022,444806786,1.0000
106,sum,total,2023,446410586,1.0000


In [412]:
# Create a variable for the year, based on begin_date:
prices_eggs['year'] = prices_eggs['begin_date'].dt.year

In [413]:
# Check results:
prices_eggs[['begin_date','year']]

Unnamed: 0,begin_date,year
0,2024-01-15,2024
1,2024-01-15,2024
2,2024-01-15,2024
3,2024-01-15,2024
4,2024-01-15,2024
...,...,...
11757,2021-01-04,2021
11758,2021-01-04,2021
11759,2021-01-04,2021
11760,2021-01-04,2021


In [414]:
# We merge the population share of each country (identified by country_code) in each year
# to our prices_eggs dataframe:
prices_eggs = pd.merge(prices_eggs, population[['country_code','year','population_share']], on=['country_code','year'], how='left')

In [415]:
# Check results:
prices_eggs

Unnamed: 0,begin_date,end_date,price,farming_method,country_code,country,year,population_share
0,2024-01-15,2024-01-21,334.49,Free range,AT,Austria,2024,0.0204
1,2024-01-15,2024-01-21,515.04,Organic,AT,Austria,2024,0.0204
2,2024-01-15,2024-01-21,266.79,Barn,AT,Austria,2024,0.0204
3,2024-01-15,2024-01-21,292.47,Free range,BE,Belgium,2024,0.0263
4,2024-01-15,2024-01-21,327.15,Organic,BE,Belgium,2024,0.0263
...,...,...,...,...,...,...,...,...
11757,2021-01-04,2021-01-10,154.31,Cage,SI,Slovenia,2021,0.0047
11758,2021-01-04,2021-01-10,88.64,Cage,ES,Spain,2021,0.1065
11759,2021-01-04,2021-01-10,310.10,Organic,SE,Sweden,2021,0.0233
11760,2021-01-04,2021-01-10,218.24,Free range,SE,Sweden,2021,0.0233


In [416]:
# Check results with random samples of cases:
prices_eggs.sample(10)

Unnamed: 0,begin_date,end_date,price,farming_method,country_code,country,year,population_share
11337,2021-02-22,2021-02-28,241.38,Organic,SI,Slovenia,2021,0.0047
10177,2021-06-21,2021-06-27,123.0,Barn,NL,Netherlands,2021,0.0393
10753,2021-04-19,2021-04-25,190.48,Barn,AT,Austria,2021,0.0201
7242,2022-03-28,2022-04-03,332.96,Organic,LV,Latvia,2022,0.0042
11678,2021-01-11,2021-01-17,132.67,Organic,EL,Greece,2021,0.024
8805,2021-11-01,2021-11-07,203.15,Free range,EE,Estonia,2021,0.003
9327,2021-09-13,2021-09-19,129.96,Barn,LT,Lithuania,2021,0.0063
5995,2022-07-25,2022-07-31,247.87,Free range,LV,Latvia,2022,0.0042
1396,2023-09-11,2023-09-17,172.91,Cage,CY,Cyprus,2023,0.0002
7516,2022-02-28,2022-03-06,153.79,Cage,EE,Estonia,2022,0.003


In [417]:
# We do not need the "year" variable anymore:
prices_eggs = prices_eggs.drop(['year'], axis=1)

In [418]:
# The weighted average of prices is the sum of the products of weight (i.e., population share) and price.
# Therefore, as the first part of the weighted average of prices,
# we need the products of the population share and the price:
prices_eggs['weight_x_price'] = prices_eggs['population_share'] * prices_eggs['price']

In [419]:
# Check results:
prices_eggs[['begin_date','end_date','country_code','country','population_share','price','weight_x_price']].sample(10)

Unnamed: 0,begin_date,end_date,country_code,country,population_share,price,weight_x_price
11274,2021-03-01,2021-03-07,SI,Slovenia,0.0047,153.0,0.7191
2074,2023-07-17,2023-07-23,SE,Sweden,0.0236,242.22,5.716392
6310,2022-06-27,2022-07-03,PT,Portugal,0.0233,181.22,4.222426
4098,2023-01-09,2023-01-15,FR,France,0.1525,250.98,38.27445
6902,2022-05-02,2022-05-08,SI,Slovenia,0.0047,402.93,1.893771
6065,2022-07-18,2022-07-24,HU,Hungary,0.0218,162.19,3.535742
2517,2023-06-05,2023-06-11,PT,Portugal,0.0234,469.58,10.988172
10785,2021-04-19,2021-04-25,IE,Ireland,0.0112,143.85,1.61112
11620,2021-01-18,2021-01-24,HU,Hungary,0.0219,122.26,2.677494
5360,2022-09-19,2022-09-25,BE,Belgium,0.0261,293.06,7.648866


In [420]:
# For each combination of date and farming_method,
# build the weighted average of EU prices as the sum of the products of weight and price
# (with "reset_index()", we make a dataframe from the results):
prices_eggs_eu = prices_eggs.groupby(['begin_date', 'end_date', 'farming_method'])['weight_x_price'].sum().reset_index()

# Check results:
prices_eggs_eu

Unnamed: 0,begin_date,end_date,farming_method,weight_x_price
0,2021-01-04,2021-01-10,Barn,63.419317
1,2021-01-04,2021-01-10,Cage,87.260217
2,2021-01-04,2021-01-10,Free range,130.828734
3,2021-01-04,2021-01-10,Organic,116.811479
4,2021-01-11,2021-01-17,Barn,66.583818
...,...,...,...,...
631,2024-01-08,2024-01-14,Organic,322.525781
632,2024-01-15,2024-01-21,Barn,198.781428
633,2024-01-15,2024-01-21,Cage,159.969558
634,2024-01-15,2024-01-21,Free range,231.765704


In [421]:
# The variable weight_x_price now captures the price in the EU and should therefore be called "price":
prices_eggs_eu = prices_eggs_eu.rename(columns={'weight_x_price': 'price'})

# We add a "country_code" and "country" for the EU values:
prices_eggs_eu['country_code'] = "EU"
prices_eggs_eu['country'] = "European Union"

# Check results:
prices_eggs_eu

Unnamed: 0,begin_date,end_date,farming_method,price,country_code,country
0,2021-01-04,2021-01-10,Barn,63.419317,EU,European Union
1,2021-01-04,2021-01-10,Cage,87.260217,EU,European Union
2,2021-01-04,2021-01-10,Free range,130.828734,EU,European Union
3,2021-01-04,2021-01-10,Organic,116.811479,EU,European Union
4,2021-01-11,2021-01-17,Barn,66.583818,EU,European Union
...,...,...,...,...,...,...
631,2024-01-08,2024-01-14,Organic,322.525781,EU,European Union
632,2024-01-15,2024-01-21,Barn,198.781428,EU,European Union
633,2024-01-15,2024-01-21,Cage,159.969558,EU,European Union
634,2024-01-15,2024-01-21,Free range,231.765704,EU,European Union


In [422]:
# Append the EU data to the country data:
prices_eggs = pd.concat([prices_eggs, prices_eggs_eu])

# Show result:
prices_eggs

Unnamed: 0,begin_date,end_date,price,farming_method,country_code,country,population_share,weight_x_price
0,2024-01-15,2024-01-21,334.490000,Free range,AT,Austria,0.0204,6.823596
1,2024-01-15,2024-01-21,515.040000,Organic,AT,Austria,0.0204,10.506816
2,2024-01-15,2024-01-21,266.790000,Barn,AT,Austria,0.0204,5.442516
3,2024-01-15,2024-01-21,292.470000,Free range,BE,Belgium,0.0263,7.691961
4,2024-01-15,2024-01-21,327.150000,Organic,BE,Belgium,0.0263,8.604045
...,...,...,...,...,...,...,...,...
631,2024-01-08,2024-01-14,322.525781,Organic,EU,European Union,,
632,2024-01-15,2024-01-21,198.781428,Barn,EU,European Union,,
633,2024-01-15,2024-01-21,159.969558,Cage,EU,European Union,,
634,2024-01-15,2024-01-21,231.765704,Free range,EU,European Union,,


In [423]:
# We do not need the population_share and weight_x_price variables anymore:
prices_eggs = prices_eggs.drop(['population_share','weight_x_price'], axis=1)

# Show variables:
prices_eggs.info()

<class 'pandas.core.frame.DataFrame'>
Index: 12398 entries, 0 to 635
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   begin_date      12398 non-null  datetime64[ns]
 1   end_date        12398 non-null  datetime64[ns]
 2   price           12398 non-null  float64       
 3   farming_method  12398 non-null  object        
 4   country_code    12398 non-null  object        
 5   country         12398 non-null  object        
dtypes: datetime64[ns](2), float64(1), object(3)
memory usage: 678.0+ KB


In [424]:
# We now want to calculate relative prices of organic vs. conventional eggs.

In [425]:
# Show the farming methods:
prices_eggs.farming_method.unique()

array(['Free range', 'Organic', 'Barn', 'Cage'], dtype=object)

In [426]:
# In the following, we will not consider the farming method "Cage",
# because this farming method does not exist in the data for some countries in the last years
# (e.g., in Germany it does not exist after December 2020).

In [427]:
# In order to calculate relative prices, we need a separate variable for
# the price of organic eggs, so that we can relate this price to the prices of conventional eggs.

In [428]:
# Create dataframe specifically for farming method "Organic":
prices_eggs_organic = prices_eggs[prices_eggs['farming_method'] == "Organic"]

# Rename price variable to specify the farming method:
prices_eggs_organic = prices_eggs_organic.rename(columns={'price': 'price_organic'})

# Show dataframe:
prices_eggs_organic

Unnamed: 0,begin_date,end_date,price_organic,farming_method,country_code,country
1,2024-01-15,2024-01-21,515.040000,Organic,AT,Austria
4,2024-01-15,2024-01-21,327.150000,Organic,BE,Belgium
14,2024-01-15,2024-01-21,714.900000,Organic,CY,Cyprus
21,2024-01-15,2024-01-21,512.170000,Organic,EE,Estonia
23,2024-01-15,2024-01-21,374.930000,Organic,FI,Finland
...,...,...,...,...,...,...
619,2023-12-18,2023-12-24,287.285031,Organic,EU,European Union
623,2023-12-25,2023-12-31,286.455523,Organic,EU,European Union
627,2024-01-01,2024-01-07,321.879906,Organic,EU,European Union
631,2024-01-08,2024-01-14,322.525781,Organic,EU,European Union


In [429]:
# Now we merge the price for organic eggs (for each date and country_code) to the prices_eggs dataframe:
prices_eggs = pd.merge(prices_eggs, prices_eggs_organic[['begin_date','end_date','country_code','price_organic']], on=['begin_date','end_date','country_code'], how='left')

# Show dataframe:
prices_eggs

Unnamed: 0,begin_date,end_date,price,farming_method,country_code,country,price_organic
0,2024-01-15,2024-01-21,334.490000,Free range,AT,Austria,515.040000
1,2024-01-15,2024-01-21,515.040000,Organic,AT,Austria,515.040000
2,2024-01-15,2024-01-21,266.790000,Barn,AT,Austria,515.040000
3,2024-01-15,2024-01-21,292.470000,Free range,BE,Belgium,327.150000
4,2024-01-15,2024-01-21,327.150000,Organic,BE,Belgium,327.150000
...,...,...,...,...,...,...,...
12599,2024-01-08,2024-01-14,322.525781,Organic,EU,European Union,322.525781
12600,2024-01-15,2024-01-21,198.781428,Barn,EU,European Union,322.521458
12601,2024-01-15,2024-01-21,159.969558,Cage,EU,European Union,322.521458
12602,2024-01-15,2024-01-21,231.765704,Free range,EU,European Union,322.521458


In [430]:
# Relative price of organic eggs in comparison to the farming method shown in the respective row (result in percent):
prices_eggs['price_organic_rel'] = ( prices_eggs['price_organic'] / prices_eggs['price'] ) * 100

# Show results:
prices_eggs

Unnamed: 0,begin_date,end_date,price,farming_method,country_code,country,price_organic,price_organic_rel
0,2024-01-15,2024-01-21,334.490000,Free range,AT,Austria,515.040000,153.977697
1,2024-01-15,2024-01-21,515.040000,Organic,AT,Austria,515.040000,100.000000
2,2024-01-15,2024-01-21,266.790000,Barn,AT,Austria,515.040000,193.050714
3,2024-01-15,2024-01-21,292.470000,Free range,BE,Belgium,327.150000,111.857626
4,2024-01-15,2024-01-21,327.150000,Organic,BE,Belgium,327.150000,100.000000
...,...,...,...,...,...,...,...,...
12599,2024-01-08,2024-01-14,322.525781,Organic,EU,European Union,322.525781,100.000000
12600,2024-01-15,2024-01-21,198.781428,Barn,EU,European Union,322.521458,162.249291
12601,2024-01-15,2024-01-21,159.969558,Cage,EU,European Union,322.521458,201.614271
12602,2024-01-15,2024-01-21,231.765704,Free range,EU,European Union,322.521458,139.158405


In [431]:
# We now want to upload the dataframe to the database on the server.

In [432]:
# Import sql_functions.py because we need some functions from that module:
import sql_functions as sqlf

# We need to restart the kernel and rerun at this point if we changed the module since we first imported it.

In [433]:
# Create a variable called engine using the get_engine function:
engine = sqlf.get_engine()

In [434]:
# We set the schema to our course name:
schema = 'capstone_organicfood'

# We set table_name to our group name + the name of the dataframe:
table_name = 'prices_eggs'

In [435]:
# We need psycopg2 for raising possible error message:
import psycopg2

In [436]:
# Write records stored in dataframe to SQL database:
if engine!=None:
    try:
        prices_eggs.to_sql(name=table_name, # name of SQL table variable
                        con=engine, # engine or connection
                        schema=schema, # our class schema variable
                        if_exists='replace', # Drop the table before inserting new values
                        index=False, # Write DataFrame index as a column
                        chunksize=5000, # Specify the number of rows in each batch to be written at a time
                        method='multi') # Pass multiple values in a single INSERT clause
        print(f"The {table_name} table was imported successfully.")
    # Error handling
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        engine = None
else:
    print('No engine')

The prices_eggs table was imported successfully.


In [437]:
# Test: query the newly created table to count the rows (we know from above that the dataframe has 12,604 cases):
sqlf.get_dataframe(f'SELECT COUNT(*) FROM {schema}.prices_eggs;')

Unnamed: 0,count
0,12604


In [438]:
# Worked!