# Prerequisites for executing notebook

In [None]:
!pip install kora -q
!pip install openpyxl

[K     |████████████████████████████████| 57 kB 5.1 MB/s 
[K     |████████████████████████████████| 59 kB 7.4 MB/s 


# Required execution for notebook

The below verifies if the notebook is being executed in a local environment (Anaconda) or if the notebook is being hosted (Google Drive), and sets certain variables based on the requirement ("cwd" being the reference of the project directory; the notebook is always assumed to be executed at the root or the highest level of the project folder)

In [None]:
import os
cwd = os.path.abspath(os.path.join(os.getcwd(), os.pardir)) + "/Datasets/"

In [None]:
]#Red pill or blue pill

from google.colab import drive
from kora import drive as drives
import os

is_drive = False
cwd = os.path.abspath(os.path.join(os.getcwd(), os.pardir)) + "/Datasets/"

while True:
  offon = input("Is this being run offline? (Y = offline (i.e. Jupyter notebook), N = online (i.e. Google Colab notebook)): ")
  try:
    if offon.lower() not in ["y", "n"]:
      raise ValueError
    else:
      if offon.lower() == "n":
        drive.mount('/content/drive')
        is_drive = True
        cwd = str(drives.chdir_notebook())
        cwd = os.path.abspath(os.path.join(os.getcwd(), os.pardir)) + "/Datasets/"
      break
  except ValueError:
    print("Error! Please only type one of the following: Y, y, N, n")

Is this being run offline? (Y = offline (i.e. Jupyter notebook), N = online (i.e. Google Colab notebook)): n
Mounted at /content/drive


In [None]:
cwd

'/content/drive/My Drive/CCT/Assignments/Assignment 2/Datasets/'

In [None]:
path = os.getcwd()
print("Current Directory", path)

print(os.path.abspath(os.path.join(path, os.pardir)))

Current Directory /content/drive/My Drive/CCT/Assignments/Assignment 2/Notebooks
/content/drive/My Drive/CCT/Assignments/Assignment 2


# Import datasets

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime

In [None]:
xlsx = pd.ExcelFile(cwd+"milk_dataset V2.xlsx")

In [None]:
df1 = pd.read_excel(xlsx, "Output")
df2 = pd.read_excel(xlsx, "Price of heifer cows")
df3 = pd.read_excel(xlsx, "Price of meal")
df4 = pd.read_excel(xlsx, "Average value of raw milk")

# Dictionaries

In [None]:
months = {"Jan":1,
          "Feb":2,
          "Mar":3,
          "Apr":4,
          "May":5,
          "Jun":6,
          "Jul":7,
          "Aug":8,
          "Sep":9,
          "Oct":10,
          "Nov":11,
          "Dec":12}

In [None]:
month_names = {"January":"Jan",
          "February":"Feb",
          "March":"Mar",
          "April":"Apr",
          "May":"May",
          "June":"Jun",
          "July":"Jul",
          "August":"Aug",
          "September":"Sep",
          "October":"Oct",
          "November":"Nov",
          "December":"Dec"}

In [None]:
translations = {"Average price of raw milk from Ireland (Euro per 100kg)": "Raw milk price",
               "Butter (Thousand tonnes)": "Butter",
                "Cheese (Thousand tonnes)": "Cheese",
               "Calf nuts and cubes (16-18% protein) (Euro per Tonne)": "Calf nuts value",
               "Dairy meal (16-18% protein) (Euro per Tonne)": "Dairy meal value",
               "Dairy nuts and cubes (16-18% protein) (Euro per Tonne)": "Dairy nuts value",
               "Domestic milk intake (Million litres)": "Domestic milk intake",
               "Imported milk intake (Million litres)": "Imported milk intake",
               "Maize meal (Euro per Tonne)": "Maize meal value",
               "Skimmed & semi-skimmed milk sales (Million litres)": "Skimmed milk sales",
               "Skimmed milk powder (Thousand tonnes)": "Skimmed milk powder",
               "Whole milk sales (Million litres)": "Whole milk sales",
               "Skimmed-milk powder (Thousand tonnes)": "Skimmed milk powder",
               "Volume (Thousand tonnes)": "Milk production volume",
               "Concentrated milk (Thousand tonnes)": "Whole milk sales"}

In [None]:
inv_months = {v: k for k, v in months.items()}

# Processing of datasets

## Price of raw milk dataset

In [None]:
df4

Unnamed: 0,Year,Belgium,Bulgaria,Czech Rep.,Denmark,Germany,Estonia,Ireland,Greece,Spain,...,Poland,Portugal,Romania,Slovenia,Slovakia,Finland,Sweden,EU\n\n(without UK),% EU (without UK) compared to previous month\n(*),Unnamed: 30
0,2002m01,33.14,,25.05,34.85,33.80,,28.39,33.61,30.73,...,,34.08,,,,36.47,32.74,,,2002-01-01
1,2002m02,30.29,,25.21,34.32,32.57,,27.35,33.79,29.60,...,,33.84,,,,36.15,32.57,,,2002-02-01
2,2002m03,28.20,,25.47,34.44,32.17,,25.99,33.87,29.26,...,,33.86,,,,33.07,32.90,,,2002-03-01
3,2002m04,26.50,,26.32,34.44,30.95,,25.47,33.80,28.89,...,,33.54,,,,33.00,32.63,,,2002-04-01
4,2002m05,25.47,,25.97,33.89,29.71,,25.57,33.91,28.65,...,,33.27,,,,32.68,32.33,,,2002-05-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,2021m08,35.73,32.65,34.10,36.44,36.55,31.00,39.23,38.92,32.82,...,33.39,29.87,31.34,32.18,31.81,37.83,38.75,36.41,0.011479,2021-08-01
236,2021m09,36.46,33.40,34.79,38.06,37.51,31.78,42.44,39.56,33.20,...,34.52,30.07,32.30,33.00,32.75,40.26,39.44,37.29,0.024344,2021-09-01
237,2021m10,38.64,34.61,35.42,39.52,39.28,32.71,46.52,40.75,34.27,...,36.10,31.49,33.49,33.90,33.57,40.60,40.96,38.68,0.037317,2021-10-01
238,2021m11,40.67,35.67,36.93,39.53,41.07,34.10,48.65,41.34,35.15,...,38.17,31.74,35.39,34.79,34.53,42.03,41.63,40.03,0.034880,2021-11-01


### Extract target values

In [None]:
tar_cols = ["Year", "Ireland", "Netherlands"]
df4 = df4[tar_cols]

In [None]:
df4

Unnamed: 0,Year,Ireland,Netherlands
0,2002m01,28.39,33.49
1,2002m02,27.35,32.99
2,2002m03,25.99,31.04
3,2002m04,25.47,30.42
4,2002m05,25.57,30.25
...,...,...,...
235,2021m08,39.23,37.75
236,2021m09,42.44,38.00
237,2021m10,46.52,39.50
238,2021m11,48.65,41.25


### Rename columns to be more descriptive

In [None]:
df4 = df4.rename({"Ireland":"Average price of raw milk from Ireland",
                  "Netherlands":"Average price of raw milk from Netherlands"}, axis=1)

### Split Year column into Year and Month

In [None]:
df4["Year"] = [x.split("m") for x in df4["Year"]]

In [None]:
df4

Unnamed: 0,Year,Average price of raw milk from Ireland,Average price of raw milk from Netherlands
0,"[2002, 01]",28.39,33.49
1,"[2002, 02]",27.35,32.99
2,"[2002, 03]",25.99,31.04
3,"[2002, 04]",25.47,30.42
4,"[2002, 05]",25.57,30.25
...,...,...,...
235,"[2021, 08]",39.23,37.75
236,"[2021, 09]",42.44,38.00
237,"[2021, 10]",46.52,39.50
238,"[2021, 11]",48.65,41.25


In [None]:
df4["Month"] = [x[1] for x in df4["Year"]]
df4["Year"] = [x[0] for x in df4["Year"]]

In [None]:
df4

Unnamed: 0,Year,Average price of raw milk from Ireland,Average price of raw milk from Netherlands,Month
0,2002,28.39,33.49,01
1,2002,27.35,32.99,02
2,2002,25.99,31.04,03
3,2002,25.47,30.42,04
4,2002,25.57,30.25,05
...,...,...,...,...
235,2021,39.23,37.75,08
236,2021,42.44,38.00,09
237,2021,46.52,39.50,10
238,2021,48.65,41.25,11


In [None]:
df4["Month"] = [inv_months[int(x)] for x in df4["Month"]]

In [None]:
df4

Unnamed: 0,Year,Average price of raw milk from Ireland,Average price of raw milk from Netherlands,Month
0,2002,28.39,33.49,Jan
1,2002,27.35,32.99,Feb
2,2002,25.99,31.04,Mar
3,2002,25.47,30.42,Apr
4,2002,25.57,30.25,May
...,...,...,...,...
235,2021,39.23,37.75,Aug
236,2021,42.44,38.00,Sep
237,2021,46.52,39.50,Oct
238,2021,48.65,41.25,Nov


### Melt dataset by Raw values

In [None]:
df4_melt = df4.melt(id_vars=["Year", "Month"], value_vars=["Average price of raw milk from Ireland", "Average price of raw milk from Netherlands"])

In [None]:
df4_melt

Unnamed: 0,Year,Month,variable,value
0,2002,Jan,Average price of raw milk from Ireland,28.39
1,2002,Feb,Average price of raw milk from Ireland,27.35
2,2002,Mar,Average price of raw milk from Ireland,25.99
3,2002,Apr,Average price of raw milk from Ireland,25.47
4,2002,May,Average price of raw milk from Ireland,25.57
...,...,...,...,...
475,2021,Aug,Average price of raw milk from Netherlands,37.75
476,2021,Sep,Average price of raw milk from Netherlands,38.00
477,2021,Oct,Average price of raw milk from Netherlands,39.50
478,2021,Nov,Average price of raw milk from Netherlands,41.25


### Rename columns

In [None]:
df4_melt = df4_melt.rename({"variable":"Country"}, axis=1)

In [None]:
df4_melt

Unnamed: 0,Year,Month,Country,value
0,2002,Jan,Average price of raw milk from Ireland,28.39
1,2002,Feb,Average price of raw milk from Ireland,27.35
2,2002,Mar,Average price of raw milk from Ireland,25.99
3,2002,Apr,Average price of raw milk from Ireland,25.47
4,2002,May,Average price of raw milk from Ireland,25.57
...,...,...,...,...
475,2021,Aug,Average price of raw milk from Netherlands,37.75
476,2021,Sep,Average price of raw milk from Netherlands,38.00
477,2021,Oct,Average price of raw milk from Netherlands,39.50
478,2021,Nov,Average price of raw milk from Netherlands,41.25


### Pivot dataset to represent Raw Value by Year and Month

In [None]:
df4_pivot = df4_melt.pivot(index=["Year", "Country"], columns="Month", values="value")

In [None]:
df4_pivot = df4_pivot.reset_index()

In [None]:
df4_pivot

Month,Year,Country,Apr,Aug,Dec,Feb,Jan,Jul,Jun,Mar,May,Nov,Oct,Sep
0,2002,Average price of raw milk from Ireland,25.47,25.85,28.46,27.35,28.39,25.38,25.45,25.99,25.57,29.46,28.79,27.22
1,2002,Average price of raw milk from Netherlands,30.42,35.02,33.52,32.99,33.49,30.34,30.27,31.04,30.25,35.83,35.49,35.12
2,2003,Average price of raw milk from Ireland,25.01,28.01,29.86,26.55,28.1,25.13,25.13,25.42,25.22,31.92,30.89,29.35
3,2003,Average price of raw milk from Netherlands,28.82,34.58,32.91,30.04,30.61,29.6,29.14,28.82,29.14,35.19,34.85,34.58
4,2004,Average price of raw milk from Ireland,27.29,28.3201,30.1701,28.01,29.3501,27.8,27.49,27.7,27.49,31.5001,31.3001,30.0701
5,2004,Average price of raw milk from Netherlands,26.0401,31.8401,31.7101,28.1401,29.6301,30.4701,26.0401,26.5701,26.0401,32.3601,32.1401,31.9501
6,2005,Average price of raw milk from Ireland,27.39,27.39,29.66,28.83,29.45,26.88,26.98,27.8,27.18,30.48,30.69,28.32
7,2005,Average price of raw milk from Netherlands,26.27,31.8,29.27,27.97,28.31,26.44,26.27,26.23,26.27,31.81,31.8,31.8
8,2006,Average price of raw milk from Ireland,26.46,27.7,29.1,27.7,28.11,25.3,25.74,26.98,26.46,27.7,27.7,27.7
9,2006,Average price of raw milk from Netherlands,25.44,30.92,27.66,27.64,27.64,25.49,25.29,25.73,25.44,30.78,30.78,30.92


### Add Unit feature to match Milk Statistics dataset

In [None]:
df4_pivot["Unit"] = ["Euro per 100kg"]*len(df4_pivot.Country)

In [None]:
df4_pivot

Month,Year,Country,Apr,Aug,Dec,Feb,Jan,Jul,Jun,Mar,May,Nov,Oct,Sep,Unit
0,2002,Average price of raw milk from Ireland,25.47,25.85,28.46,27.35,28.39,25.38,25.45,25.99,25.57,29.46,28.79,27.22,Euro per 100kg
1,2002,Average price of raw milk from Netherlands,30.42,35.02,33.52,32.99,33.49,30.34,30.27,31.04,30.25,35.83,35.49,35.12,Euro per 100kg
2,2003,Average price of raw milk from Ireland,25.01,28.01,29.86,26.55,28.1,25.13,25.13,25.42,25.22,31.92,30.89,29.35,Euro per 100kg
3,2003,Average price of raw milk from Netherlands,28.82,34.58,32.91,30.04,30.61,29.6,29.14,28.82,29.14,35.19,34.85,34.58,Euro per 100kg
4,2004,Average price of raw milk from Ireland,27.29,28.3201,30.1701,28.01,29.3501,27.8,27.49,27.7,27.49,31.5001,31.3001,30.0701,Euro per 100kg
5,2004,Average price of raw milk from Netherlands,26.0401,31.8401,31.7101,28.1401,29.6301,30.4701,26.0401,26.5701,26.0401,32.3601,32.1401,31.9501,Euro per 100kg
6,2005,Average price of raw milk from Ireland,27.39,27.39,29.66,28.83,29.45,26.88,26.98,27.8,27.18,30.48,30.69,28.32,Euro per 100kg
7,2005,Average price of raw milk from Netherlands,26.27,31.8,29.27,27.97,28.31,26.44,26.27,26.23,26.27,31.81,31.8,31.8,Euro per 100kg
8,2006,Average price of raw milk from Ireland,26.46,27.7,29.1,27.7,28.11,25.3,25.74,26.98,26.46,27.7,27.7,27.7,Euro per 100kg
9,2006,Average price of raw milk from Netherlands,25.44,30.92,27.66,27.64,27.64,25.49,25.29,25.73,25.44,30.78,30.78,30.92,Euro per 100kg


### Reorder columns

In [None]:
cols = list(df4_pivot.columns[0:2]) + [df4_pivot.columns[-1]] + list(df4_pivot.columns[2:-1])
cols

['Year',
 'Country',
 'Unit',
 'Apr',
 'Aug',
 'Dec',
 'Feb',
 'Jan',
 'Jul',
 'Jun',
 'Mar',
 'May',
 'Nov',
 'Oct',
 'Sep']

In [None]:
df4_pivot = df4_pivot[cols]

In [None]:
df4_pivot

Month,Year,Country,Unit,Apr,Aug,Dec,Feb,Jan,Jul,Jun,Mar,May,Nov,Oct,Sep
0,2002,Average price of raw milk from Ireland,Euro per 100kg,25.47,25.85,28.46,27.35,28.39,25.38,25.45,25.99,25.57,29.46,28.79,27.22
1,2002,Average price of raw milk from Netherlands,Euro per 100kg,30.42,35.02,33.52,32.99,33.49,30.34,30.27,31.04,30.25,35.83,35.49,35.12
2,2003,Average price of raw milk from Ireland,Euro per 100kg,25.01,28.01,29.86,26.55,28.1,25.13,25.13,25.42,25.22,31.92,30.89,29.35
3,2003,Average price of raw milk from Netherlands,Euro per 100kg,28.82,34.58,32.91,30.04,30.61,29.6,29.14,28.82,29.14,35.19,34.85,34.58
4,2004,Average price of raw milk from Ireland,Euro per 100kg,27.29,28.3201,30.1701,28.01,29.3501,27.8,27.49,27.7,27.49,31.5001,31.3001,30.0701
5,2004,Average price of raw milk from Netherlands,Euro per 100kg,26.0401,31.8401,31.7101,28.1401,29.6301,30.4701,26.0401,26.5701,26.0401,32.3601,32.1401,31.9501
6,2005,Average price of raw milk from Ireland,Euro per 100kg,27.39,27.39,29.66,28.83,29.45,26.88,26.98,27.8,27.18,30.48,30.69,28.32
7,2005,Average price of raw milk from Netherlands,Euro per 100kg,26.27,31.8,29.27,27.97,28.31,26.44,26.27,26.23,26.27,31.81,31.8,31.8
8,2006,Average price of raw milk from Ireland,Euro per 100kg,26.46,27.7,29.1,27.7,28.11,25.3,25.74,26.98,26.46,27.7,27.7,27.7
9,2006,Average price of raw milk from Netherlands,Euro per 100kg,25.44,30.92,27.66,27.64,27.64,25.49,25.29,25.73,25.44,30.78,30.78,30.92


### Rename Country to Category to match Milk Statistics

In [None]:
df4_pivot = df4_pivot.rename({"Country":"Category"}, axis=1)

In [None]:
df4_pivot

Month,Year,Category,Unit,Apr,Aug,Dec,Feb,Jan,Jul,Jun,Mar,May,Nov,Oct,Sep
0,2002,Average price of raw milk from Ireland,Euro per 100kg,25.47,25.85,28.46,27.35,28.39,25.38,25.45,25.99,25.57,29.46,28.79,27.22
1,2002,Average price of raw milk from Netherlands,Euro per 100kg,30.42,35.02,33.52,32.99,33.49,30.34,30.27,31.04,30.25,35.83,35.49,35.12
2,2003,Average price of raw milk from Ireland,Euro per 100kg,25.01,28.01,29.86,26.55,28.1,25.13,25.13,25.42,25.22,31.92,30.89,29.35
3,2003,Average price of raw milk from Netherlands,Euro per 100kg,28.82,34.58,32.91,30.04,30.61,29.6,29.14,28.82,29.14,35.19,34.85,34.58
4,2004,Average price of raw milk from Ireland,Euro per 100kg,27.29,28.3201,30.1701,28.01,29.3501,27.8,27.49,27.7,27.49,31.5001,31.3001,30.0701
5,2004,Average price of raw milk from Netherlands,Euro per 100kg,26.0401,31.8401,31.7101,28.1401,29.6301,30.4701,26.0401,26.5701,26.0401,32.3601,32.1401,31.9501
6,2005,Average price of raw milk from Ireland,Euro per 100kg,27.39,27.39,29.66,28.83,29.45,26.88,26.98,27.8,27.18,30.48,30.69,28.32
7,2005,Average price of raw milk from Netherlands,Euro per 100kg,26.27,31.8,29.27,27.97,28.31,26.44,26.27,26.23,26.27,31.81,31.8,31.8
8,2006,Average price of raw milk from Ireland,Euro per 100kg,26.46,27.7,29.1,27.7,28.11,25.3,25.74,26.98,26.46,27.7,27.7,27.7
9,2006,Average price of raw milk from Netherlands,Euro per 100kg,25.44,30.92,27.66,27.64,27.64,25.49,25.29,25.73,25.44,30.78,30.78,30.92


## Price of cattle meal dataset

In [None]:
df3

Unnamed: 0,Statistic,Month,Type of Feedstuff,UNIT,VALUE
0,Feed Stuff Price,2014M01,Maize meal,Euro per Tonne,252
1,Feed Stuff Price,2014M01,Calf nuts and cubes (16-18% protein),Euro per Tonne,329
2,Feed Stuff Price,2014M01,Calf meal (16-18% protein),Euro per Tonne,.
3,Feed Stuff Price,2014M01,Dairy nuts and cubes (16-18% protein),Euro per Tonne,302
4,Feed Stuff Price,2014M01,Dairy meal (16-18% protein),Euro per Tonne,300
...,...,...,...,...,...
480,Feed Stuff Price,2022M01,Maize meal,Euro per Tonne,326
481,Feed Stuff Price,2022M01,Calf nuts and cubes (16-18% protein),Euro per Tonne,386
482,Feed Stuff Price,2022M01,Calf meal (16-18% protein),Euro per Tonne,.
483,Feed Stuff Price,2022M01,Dairy nuts and cubes (16-18% protein),Euro per Tonne,362


### Rename columns to match Milk Statistics

In [None]:
df3 = df3.rename({"Type of Feedstuff": "Category"}, axis=1)

In [None]:
df3.Category = [str(x + " (" + y + ")") for (x, y) in zip(df3.Category,
                                                         df3["UNIT"])]

In [None]:
df3

Unnamed: 0,Statistic,Month,Category,UNIT,VALUE
0,Feed Stuff Price,2014M01,Maize meal (Euro per Tonne),Euro per Tonne,252
1,Feed Stuff Price,2014M01,Calf nuts and cubes (16-18% protein) (Euro per...,Euro per Tonne,329
2,Feed Stuff Price,2014M01,Calf meal (16-18% protein) (Euro per Tonne),Euro per Tonne,.
3,Feed Stuff Price,2014M01,Dairy nuts and cubes (16-18% protein) (Euro pe...,Euro per Tonne,302
4,Feed Stuff Price,2014M01,Dairy meal (16-18% protein) (Euro per Tonne),Euro per Tonne,300
...,...,...,...,...,...
480,Feed Stuff Price,2022M01,Maize meal (Euro per Tonne),Euro per Tonne,326
481,Feed Stuff Price,2022M01,Calf nuts and cubes (16-18% protein) (Euro per...,Euro per Tonne,386
482,Feed Stuff Price,2022M01,Calf meal (16-18% protein) (Euro per Tonne),Euro per Tonne,.
483,Feed Stuff Price,2022M01,Dairy nuts and cubes (16-18% protein) (Euro pe...,Euro per Tonne,362


### Drop unused columns

In [None]:
df3 = df3.drop("Statistic", axis=1)

In [None]:
df3

Unnamed: 0,Month,Category,UNIT,VALUE
0,2014M01,Maize meal (Euro per Tonne),Euro per Tonne,252
1,2014M01,Calf nuts and cubes (16-18% protein) (Euro per...,Euro per Tonne,329
2,2014M01,Calf meal (16-18% protein) (Euro per Tonne),Euro per Tonne,.
3,2014M01,Dairy nuts and cubes (16-18% protein) (Euro pe...,Euro per Tonne,302
4,2014M01,Dairy meal (16-18% protein) (Euro per Tonne),Euro per Tonne,300
...,...,...,...,...
480,2022M01,Maize meal (Euro per Tonne),Euro per Tonne,326
481,2022M01,Calf nuts and cubes (16-18% protein) (Euro per...,Euro per Tonne,386
482,2022M01,Calf meal (16-18% protein) (Euro per Tonne),Euro per Tonne,.
483,2022M01,Dairy nuts and cubes (16-18% protein) (Euro pe...,Euro per Tonne,362


### Create Year and Month features by splitting Month Feature

In [None]:
df3["Month"] = [x.split("M") for x in df3["Month"]]

In [None]:
df3

Unnamed: 0,Month,Category,UNIT,VALUE
0,"[2014, 01]",Maize meal (Euro per Tonne),Euro per Tonne,252
1,"[2014, 01]",Calf nuts and cubes (16-18% protein) (Euro per...,Euro per Tonne,329
2,"[2014, 01]",Calf meal (16-18% protein) (Euro per Tonne),Euro per Tonne,.
3,"[2014, 01]",Dairy nuts and cubes (16-18% protein) (Euro pe...,Euro per Tonne,302
4,"[2014, 01]",Dairy meal (16-18% protein) (Euro per Tonne),Euro per Tonne,300
...,...,...,...,...
480,"[2022, 01]",Maize meal (Euro per Tonne),Euro per Tonne,326
481,"[2022, 01]",Calf nuts and cubes (16-18% protein) (Euro per...,Euro per Tonne,386
482,"[2022, 01]",Calf meal (16-18% protein) (Euro per Tonne),Euro per Tonne,.
483,"[2022, 01]",Dairy nuts and cubes (16-18% protein) (Euro pe...,Euro per Tonne,362


In [None]:
df3["Year"] = [int(x[0]) for x in df3["Month"]]

In [None]:
df3["Month"] = [int(x[1]) for x in df3["Month"]]

In [None]:
df3

Unnamed: 0,Month,Category,UNIT,VALUE,Year
0,1,Maize meal (Euro per Tonne),Euro per Tonne,252,2014
1,1,Calf nuts and cubes (16-18% protein) (Euro per...,Euro per Tonne,329,2014
2,1,Calf meal (16-18% protein) (Euro per Tonne),Euro per Tonne,.,2014
3,1,Dairy nuts and cubes (16-18% protein) (Euro pe...,Euro per Tonne,302,2014
4,1,Dairy meal (16-18% protein) (Euro per Tonne),Euro per Tonne,300,2014
...,...,...,...,...,...
480,1,Maize meal (Euro per Tonne),Euro per Tonne,326,2022
481,1,Calf nuts and cubes (16-18% protein) (Euro per...,Euro per Tonne,386,2022
482,1,Calf meal (16-18% protein) (Euro per Tonne),Euro per Tonne,.,2022
483,1,Dairy nuts and cubes (16-18% protein) (Euro pe...,Euro per Tonne,362,2022


In [None]:
df3["Month"] = [inv_months[x] for x in df3["Month"]]

### Pivot dataset to display data by Year, with all Month values

In [None]:
df3_pivot = df3.pivot(index=["Year", "Category", "UNIT"], columns='Month', values="VALUE")

In [None]:
df3_pivot.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Month,Apr,Aug,Dec,Feb,Jan,Jul,Jun,Mar,May,Nov,Oct,Sep
Year,Category,UNIT,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2014,Calf meal (16-18% protein) (Euro per Tonne),Euro per Tonne,.,.,.,.,.,.,.,.,.,.,.,.
2014,Calf nuts and cubes (16-18% protein) (Euro per Tonne),Euro per Tonne,313,311,289,329,329,311,312,305,315,287,279,297
2014,Dairy meal (16-18% protein) (Euro per Tonne),Euro per Tonne,276,278,257,303,300,279,278,290,284,256,250,257
2014,Dairy nuts and cubes (16-18% protein) (Euro per Tonne),Euro per Tonne,289,287,269,303,302,289,290,288,292,267,270,273
2014,Maize meal (Euro per Tonne),Euro per Tonne,233,228,222,248,252,234,240,244,240,224,227,229
2015,Calf meal (16-18% protein) (Euro per Tonne),Euro per Tonne,.,.,.,.,.,.,.,.,.,.,.,.
2015,Calf nuts and cubes (16-18% protein) (Euro per Tonne),Euro per Tonne,297,296,293,289,292,296,293,292,295,294,292,294
2015,Dairy meal (16-18% protein) (Euro per Tonne),Euro per Tonne,264,269,267,262,259,269,267,266,266,265,269,265
2015,Dairy nuts and cubes (16-18% protein) (Euro per Tonne),Euro per Tonne,272,273,271,269,269,273,273,272,273,272,272,273
2015,Maize meal (Euro per Tonne),Euro per Tonne,221,223,220,220,223,223,224,223,222,219,220,221


In [None]:
df3_pivot = df3_pivot.reset_index()

In [None]:
df3_pivot

Month,Year,Category,UNIT,Apr,Aug,Dec,Feb,Jan,Jul,Jun,Mar,May,Nov,Oct,Sep
0,2014,Calf meal (16-18% protein) (Euro per Tonne),Euro per Tonne,.,.,.,.,.,.,.,.,.,.,.,.
1,2014,Calf nuts and cubes (16-18% protein) (Euro per...,Euro per Tonne,313,311,289,329,329,311,312,305,315,287,279,297
2,2014,Dairy meal (16-18% protein) (Euro per Tonne),Euro per Tonne,276,278,257,303,300,279,278,290,284,256,250,257
3,2014,Dairy nuts and cubes (16-18% protein) (Euro pe...,Euro per Tonne,289,287,269,303,302,289,290,288,292,267,270,273
4,2014,Maize meal (Euro per Tonne),Euro per Tonne,233,228,222,248,252,234,240,244,240,224,227,229
5,2015,Calf meal (16-18% protein) (Euro per Tonne),Euro per Tonne,.,.,.,.,.,.,.,.,.,.,.,.
6,2015,Calf nuts and cubes (16-18% protein) (Euro per...,Euro per Tonne,297,296,293,289,292,296,293,292,295,294,292,294
7,2015,Dairy meal (16-18% protein) (Euro per Tonne),Euro per Tonne,264,269,267,262,259,269,267,266,266,265,269,265
8,2015,Dairy nuts and cubes (16-18% protein) (Euro pe...,Euro per Tonne,272,273,271,269,269,273,273,272,273,272,272,273
9,2015,Maize meal (Euro per Tonne),Euro per Tonne,221,223,220,220,223,223,224,223,222,219,220,221


### Rename Month columns to match Milk Statistics

In [None]:
df3_pivot = df3_pivot.rename(inv_months, axis=1)

In [None]:
df3_pivot

Month,Year,Category,UNIT,Apr,Aug,Dec,Feb,Jan,Jul,Jun,Mar,May,Nov,Oct,Sep
0,2014,Calf meal (16-18% protein) (Euro per Tonne),Euro per Tonne,.,.,.,.,.,.,.,.,.,.,.,.
1,2014,Calf nuts and cubes (16-18% protein) (Euro per...,Euro per Tonne,313,311,289,329,329,311,312,305,315,287,279,297
2,2014,Dairy meal (16-18% protein) (Euro per Tonne),Euro per Tonne,276,278,257,303,300,279,278,290,284,256,250,257
3,2014,Dairy nuts and cubes (16-18% protein) (Euro pe...,Euro per Tonne,289,287,269,303,302,289,290,288,292,267,270,273
4,2014,Maize meal (Euro per Tonne),Euro per Tonne,233,228,222,248,252,234,240,244,240,224,227,229
5,2015,Calf meal (16-18% protein) (Euro per Tonne),Euro per Tonne,.,.,.,.,.,.,.,.,.,.,.,.
6,2015,Calf nuts and cubes (16-18% protein) (Euro per...,Euro per Tonne,297,296,293,289,292,296,293,292,295,294,292,294
7,2015,Dairy meal (16-18% protein) (Euro per Tonne),Euro per Tonne,264,269,267,262,259,269,267,266,266,265,269,265
8,2015,Dairy nuts and cubes (16-18% protein) (Euro pe...,Euro per Tonne,272,273,271,269,269,273,273,272,273,272,272,273
9,2015,Maize meal (Euro per Tonne),Euro per Tonne,221,223,220,220,223,223,224,223,222,219,220,221


### Rename column to match Milk Statistics

In [None]:
df3_pivot = df3_pivot.rename({"UNIT": "Unit"}, axis=1)

In [None]:
df3_pivot

Month,Year,Category,Unit,Apr,Aug,Dec,Feb,Jan,Jul,Jun,Mar,May,Nov,Oct,Sep
0,2014,Calf meal (16-18% protein) (Euro per Tonne),Euro per Tonne,.,.,.,.,.,.,.,.,.,.,.,.
1,2014,Calf nuts and cubes (16-18% protein) (Euro per...,Euro per Tonne,313,311,289,329,329,311,312,305,315,287,279,297
2,2014,Dairy meal (16-18% protein) (Euro per Tonne),Euro per Tonne,276,278,257,303,300,279,278,290,284,256,250,257
3,2014,Dairy nuts and cubes (16-18% protein) (Euro pe...,Euro per Tonne,289,287,269,303,302,289,290,288,292,267,270,273
4,2014,Maize meal (Euro per Tonne),Euro per Tonne,233,228,222,248,252,234,240,244,240,224,227,229
5,2015,Calf meal (16-18% protein) (Euro per Tonne),Euro per Tonne,.,.,.,.,.,.,.,.,.,.,.,.
6,2015,Calf nuts and cubes (16-18% protein) (Euro per...,Euro per Tonne,297,296,293,289,292,296,293,292,295,294,292,294
7,2015,Dairy meal (16-18% protein) (Euro per Tonne),Euro per Tonne,264,269,267,262,259,269,267,266,266,265,269,265
8,2015,Dairy nuts and cubes (16-18% protein) (Euro pe...,Euro per Tonne,272,273,271,269,269,273,273,272,273,272,272,273
9,2015,Maize meal (Euro per Tonne),Euro per Tonne,221,223,220,220,223,223,224,223,222,219,220,221


### Remove columns outsides target range

In [None]:
df3_pivot = df3_pivot[df3_pivot.Year < 2022]

In [None]:
df3_pivot

Month,Year,Category,Unit,Apr,Aug,Dec,Feb,Jan,Jul,Jun,Mar,May,Nov,Oct,Sep
0,2014,Calf meal (16-18% protein) (Euro per Tonne),Euro per Tonne,.,.,.,.,.,.,.,.,.,.,.,.
1,2014,Calf nuts and cubes (16-18% protein) (Euro per...,Euro per Tonne,313,311,289,329,329,311,312,305,315,287,279,297
2,2014,Dairy meal (16-18% protein) (Euro per Tonne),Euro per Tonne,276,278,257,303,300,279,278,290,284,256,250,257
3,2014,Dairy nuts and cubes (16-18% protein) (Euro pe...,Euro per Tonne,289,287,269,303,302,289,290,288,292,267,270,273
4,2014,Maize meal (Euro per Tonne),Euro per Tonne,233,228,222,248,252,234,240,244,240,224,227,229
5,2015,Calf meal (16-18% protein) (Euro per Tonne),Euro per Tonne,.,.,.,.,.,.,.,.,.,.,.,.
6,2015,Calf nuts and cubes (16-18% protein) (Euro per...,Euro per Tonne,297,296,293,289,292,296,293,292,295,294,292,294
7,2015,Dairy meal (16-18% protein) (Euro per Tonne),Euro per Tonne,264,269,267,262,259,269,267,266,266,265,269,265
8,2015,Dairy nuts and cubes (16-18% protein) (Euro pe...,Euro per Tonne,272,273,271,269,269,273,273,272,273,272,272,273
9,2015,Maize meal (Euro per Tonne),Euro per Tonne,221,223,220,220,223,223,224,223,222,219,220,221


## Price of heifer cattle dataset

In [None]:
df2

Unnamed: 0,Statistic,Month,Type of Cattle,UNIT,VALUE
0,Cattle Price per Head,2002M01,Heifers 200-249kg,Euro,274.71
1,Cattle Price per Head,2002M01,Heifers 250-299kg,Euro,325.25
2,Cattle Price per Head,2002M01,Heifers 300-349kg,Euro,390.32
3,Cattle Price per Head,2002M01,Heifers 350-399kg,Euro,467.19
4,Cattle Price per Head,2002M01,Heifers 400-449kg,Euro,559.87
...,...,...,...,...,...
1200,Cattle Price per Head,2022M01,Heifers 200-249kg,Euro,553.25
1201,Cattle Price per Head,2022M01,Heifers 250-299kg,Euro,640.07
1202,Cattle Price per Head,2022M01,Heifers 300-349kg,Euro,765.68
1203,Cattle Price per Head,2022M01,Heifers 350-399kg,Euro,857.22


### Rename columns to match Milk Statistics

In [None]:
df2 = df2.rename({"Type of Cattle": "Category"}, axis=1)

In [None]:
df2["Type of Cattle"] = [str(x + " (" + y + ")") for (x,y) in zip(df2["Category"],
                                                                  df2["UNIT"])]

In [None]:
df2

Unnamed: 0,Statistic,Month,Category,UNIT,VALUE,Type of Cattle
0,Cattle Price per Head,2002M01,Heifers 200-249kg,Euro,274.71,Heifers 200-249kg (Euro)
1,Cattle Price per Head,2002M01,Heifers 250-299kg,Euro,325.25,Heifers 250-299kg (Euro)
2,Cattle Price per Head,2002M01,Heifers 300-349kg,Euro,390.32,Heifers 300-349kg (Euro)
3,Cattle Price per Head,2002M01,Heifers 350-399kg,Euro,467.19,Heifers 350-399kg (Euro)
4,Cattle Price per Head,2002M01,Heifers 400-449kg,Euro,559.87,Heifers 400-449kg (Euro)
...,...,...,...,...,...,...
1200,Cattle Price per Head,2022M01,Heifers 200-249kg,Euro,553.25,Heifers 200-249kg (Euro)
1201,Cattle Price per Head,2022M01,Heifers 250-299kg,Euro,640.07,Heifers 250-299kg (Euro)
1202,Cattle Price per Head,2022M01,Heifers 300-349kg,Euro,765.68,Heifers 300-349kg (Euro)
1203,Cattle Price per Head,2022M01,Heifers 350-399kg,Euro,857.22,Heifers 350-399kg (Euro)


### Drop unused columns

In [None]:
df2 = df2.drop("Statistic", axis=1)

In [None]:
df2

Unnamed: 0,Month,Category,UNIT,VALUE,Type of Cattle
0,2002M01,Heifers 200-249kg,Euro,274.71,Heifers 200-249kg (Euro)
1,2002M01,Heifers 250-299kg,Euro,325.25,Heifers 250-299kg (Euro)
2,2002M01,Heifers 300-349kg,Euro,390.32,Heifers 300-349kg (Euro)
3,2002M01,Heifers 350-399kg,Euro,467.19,Heifers 350-399kg (Euro)
4,2002M01,Heifers 400-449kg,Euro,559.87,Heifers 400-449kg (Euro)
...,...,...,...,...,...
1200,2022M01,Heifers 200-249kg,Euro,553.25,Heifers 200-249kg (Euro)
1201,2022M01,Heifers 250-299kg,Euro,640.07,Heifers 250-299kg (Euro)
1202,2022M01,Heifers 300-349kg,Euro,765.68,Heifers 300-349kg (Euro)
1203,2022M01,Heifers 350-399kg,Euro,857.22,Heifers 350-399kg (Euro)


### Split Month into Year and Month values

In [None]:
df2["Month"] = [x.split("M") for x in df2["Month"]]

In [None]:
df2

Unnamed: 0,Month,Category,UNIT,VALUE,Type of Cattle
0,"[2002, 01]",Heifers 200-249kg,Euro,274.71,Heifers 200-249kg (Euro)
1,"[2002, 01]",Heifers 250-299kg,Euro,325.25,Heifers 250-299kg (Euro)
2,"[2002, 01]",Heifers 300-349kg,Euro,390.32,Heifers 300-349kg (Euro)
3,"[2002, 01]",Heifers 350-399kg,Euro,467.19,Heifers 350-399kg (Euro)
4,"[2002, 01]",Heifers 400-449kg,Euro,559.87,Heifers 400-449kg (Euro)
...,...,...,...,...,...
1200,"[2022, 01]",Heifers 200-249kg,Euro,553.25,Heifers 200-249kg (Euro)
1201,"[2022, 01]",Heifers 250-299kg,Euro,640.07,Heifers 250-299kg (Euro)
1202,"[2022, 01]",Heifers 300-349kg,Euro,765.68,Heifers 300-349kg (Euro)
1203,"[2022, 01]",Heifers 350-399kg,Euro,857.22,Heifers 350-399kg (Euro)


In [None]:
df2["Year"] = [int(x[0]) for x in df2["Month"]]

In [None]:
df2["Month"] = [int(x[1]) for x in df2["Month"]]

In [None]:
df2

Unnamed: 0,Month,Category,UNIT,VALUE,Type of Cattle,Year
0,1,Heifers 200-249kg,Euro,274.71,Heifers 200-249kg (Euro),2002
1,1,Heifers 250-299kg,Euro,325.25,Heifers 250-299kg (Euro),2002
2,1,Heifers 300-349kg,Euro,390.32,Heifers 300-349kg (Euro),2002
3,1,Heifers 350-399kg,Euro,467.19,Heifers 350-399kg (Euro),2002
4,1,Heifers 400-449kg,Euro,559.87,Heifers 400-449kg (Euro),2002
...,...,...,...,...,...,...
1200,1,Heifers 200-249kg,Euro,553.25,Heifers 200-249kg (Euro),2022
1201,1,Heifers 250-299kg,Euro,640.07,Heifers 250-299kg (Euro),2022
1202,1,Heifers 300-349kg,Euro,765.68,Heifers 300-349kg (Euro),2022
1203,1,Heifers 350-399kg,Euro,857.22,Heifers 350-399kg (Euro),2022


In [None]:
df2["Month"] = [inv_months[x] for x in df2["Month"]]

### Pivot dataset by Year, to display all Month values per year

In [None]:
df2_pivot = df2.pivot(index=["Year", "Category", "UNIT"], columns='Month', values="VALUE")

In [None]:
df2_pivot.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Month,Apr,Aug,Dec,Feb,Jan,Jul,Jun,Mar,May,Nov,Oct,Sep
Year,Category,UNIT,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2002,Heifers 200-249kg,Euro,279.94,327.99,339.24,286.94,274.71,313.37,333.42,283.65,310.74,357.43,340.46,325.42
2002,Heifers 250-299kg,Euro,380.13,385.37,365.64,328.66,325.25,397.76,373.85,332.73,388.89,387.97,391.7,386.36
2002,Heifers 300-349kg,Euro,455.14,448.89,418.16,398.31,390.32,442.41,467.65,437.81,440.45,423.91,449.08,452.85
2002,Heifers 350-399kg,Euro,510.17,520.52,486.0,522.77,467.19,509.89,507.63,519.85,528.76,488.88,484.95,512.38
2002,Heifers 400-449kg,Euro,589.39,576.71,565.87,576.65,559.87,579.56,577.66,566.68,569.65,548.61,559.21,545.53
2003,Heifers 200-249kg,Euro,334.13,392.52,375.06,303.01,317.5,370.01,373.41,307.66,351.18,366.32,412.86,402.6
2003,Heifers 250-299kg,Euro,394.79,479.31,418.97,354.3,357.65,467.66,443.41,387.2,450.38,432.5,438.88,475.43
2003,Heifers 300-349kg,Euro,503.82,543.73,442.12,447.59,419.68,536.07,530.23,480.28,516.9,472.55,490.4,500.83
2003,Heifers 350-399kg,Euro,580.69,566.31,520.18,534.17,510.14,614.4,606.35,574.71,605.27,505.74,534.09,553.46
2003,Heifers 400-449kg,Euro,675.02,623.59,575.51,631.46,591.18,637.3,672.48,641.57,672.44,581.71,574.58,597.72


In [None]:
df2_pivot = df2_pivot.reset_index()

In [None]:
df2_pivot

Month,Year,Category,UNIT,Apr,Aug,Dec,Feb,Jan,Jul,Jun,Mar,May,Nov,Oct,Sep
0,2002,Heifers 200-249kg,Euro,279.94,327.99,339.24,286.94,274.71,313.37,333.42,283.65,310.74,357.43,340.46,325.42
1,2002,Heifers 250-299kg,Euro,380.13,385.37,365.64,328.66,325.25,397.76,373.85,332.73,388.89,387.97,391.70,386.36
2,2002,Heifers 300-349kg,Euro,455.14,448.89,418.16,398.31,390.32,442.41,467.65,437.81,440.45,423.91,449.08,452.85
3,2002,Heifers 350-399kg,Euro,510.17,520.52,486.00,522.77,467.19,509.89,507.63,519.85,528.76,488.88,484.95,512.38
4,2002,Heifers 400-449kg,Euro,589.39,576.71,565.87,576.65,559.87,579.56,577.66,566.68,569.65,548.61,559.21,545.53
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100,2022,Heifers 200-249kg,Euro,,,,,553.25,,,,,,,
101,2022,Heifers 250-299kg,Euro,,,,,640.07,,,,,,,
102,2022,Heifers 300-349kg,Euro,,,,,765.68,,,,,,,
103,2022,Heifers 350-399kg,Euro,,,,,857.22,,,,,,,


### Rename Month columns to match Milk Statistics format

In [None]:
df2_pivot = df2_pivot.rename(inv_months, axis=1)

In [None]:
df2_pivot

Month,Year,Category,UNIT,Apr,Aug,Dec,Feb,Jan,Jul,Jun,Mar,May,Nov,Oct,Sep
0,2002,Heifers 200-249kg,Euro,279.94,327.99,339.24,286.94,274.71,313.37,333.42,283.65,310.74,357.43,340.46,325.42
1,2002,Heifers 250-299kg,Euro,380.13,385.37,365.64,328.66,325.25,397.76,373.85,332.73,388.89,387.97,391.70,386.36
2,2002,Heifers 300-349kg,Euro,455.14,448.89,418.16,398.31,390.32,442.41,467.65,437.81,440.45,423.91,449.08,452.85
3,2002,Heifers 350-399kg,Euro,510.17,520.52,486.00,522.77,467.19,509.89,507.63,519.85,528.76,488.88,484.95,512.38
4,2002,Heifers 400-449kg,Euro,589.39,576.71,565.87,576.65,559.87,579.56,577.66,566.68,569.65,548.61,559.21,545.53
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100,2022,Heifers 200-249kg,Euro,,,,,553.25,,,,,,,
101,2022,Heifers 250-299kg,Euro,,,,,640.07,,,,,,,
102,2022,Heifers 300-349kg,Euro,,,,,765.68,,,,,,,
103,2022,Heifers 350-399kg,Euro,,,,,857.22,,,,,,,


### Rename columns to match Milk Statistics

In [None]:
df2_pivot = df2_pivot.rename({"UNIT": "Unit"}, axis=1)

In [None]:
df2_pivot

Month,Year,Category,Unit,Apr,Aug,Dec,Feb,Jan,Jul,Jun,Mar,May,Nov,Oct,Sep
0,2002,Heifers 200-249kg,Euro,279.94,327.99,339.24,286.94,274.71,313.37,333.42,283.65,310.74,357.43,340.46,325.42
1,2002,Heifers 250-299kg,Euro,380.13,385.37,365.64,328.66,325.25,397.76,373.85,332.73,388.89,387.97,391.70,386.36
2,2002,Heifers 300-349kg,Euro,455.14,448.89,418.16,398.31,390.32,442.41,467.65,437.81,440.45,423.91,449.08,452.85
3,2002,Heifers 350-399kg,Euro,510.17,520.52,486.00,522.77,467.19,509.89,507.63,519.85,528.76,488.88,484.95,512.38
4,2002,Heifers 400-449kg,Euro,589.39,576.71,565.87,576.65,559.87,579.56,577.66,566.68,569.65,548.61,559.21,545.53
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100,2022,Heifers 200-249kg,Euro,,,,,553.25,,,,,,,
101,2022,Heifers 250-299kg,Euro,,,,,640.07,,,,,,,
102,2022,Heifers 300-349kg,Euro,,,,,765.68,,,,,,,
103,2022,Heifers 350-399kg,Euro,,,,,857.22,,,,,,,


### Remove data outside of range for project

In [None]:
df2_pivot = df2_pivot[df2_pivot.Year < 2022]

In [None]:
df2_pivot

Month,Year,Category,Unit,Apr,Aug,Dec,Feb,Jan,Jul,Jun,Mar,May,Nov,Oct,Sep
0,2002,Heifers 200-249kg,Euro,279.94,327.99,339.24,286.94,274.71,313.37,333.42,283.65,310.74,357.43,340.46,325.42
1,2002,Heifers 250-299kg,Euro,380.13,385.37,365.64,328.66,325.25,397.76,373.85,332.73,388.89,387.97,391.70,386.36
2,2002,Heifers 300-349kg,Euro,455.14,448.89,418.16,398.31,390.32,442.41,467.65,437.81,440.45,423.91,449.08,452.85
3,2002,Heifers 350-399kg,Euro,510.17,520.52,486.00,522.77,467.19,509.89,507.63,519.85,528.76,488.88,484.95,512.38
4,2002,Heifers 400-449kg,Euro,589.39,576.71,565.87,576.65,559.87,579.56,577.66,566.68,569.65,548.61,559.21,545.53
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2021,Heifers 200-249kg,Euro,526.39,480.00,490.37,498.79,540.79,515.19,485.51,525.12,499.10,419.66,516.00,450.74
96,2021,Heifers 250-299kg,Euro,613.50,598.07,623.57,601.33,614.47,623.33,608.60,610.85,601.35,547.42,529.67,570.54
97,2021,Heifers 300-349kg,Euro,720.86,689.20,693.47,690.26,691.65,723.04,730.25,713.21,689.16,641.39,656.53,694.88
98,2021,Heifers 350-399kg,Euro,845.05,801.61,806.00,814.00,798.03,837.85,844.54,827.74,796.61,762.83,777.37,804.71


### Drop data with missing values

In [None]:
df2_pivot = df2_pivot[df2_pivot.Category != "Calf meal (16-18% protein) (Euro per Tonne)"]

In [None]:
df2_pivot

Month,Year,Category,Unit,Apr,Aug,Dec,Feb,Jan,Jul,Jun,Mar,May,Nov,Oct,Sep
0,2002,Heifers 200-249kg,Euro,279.94,327.99,339.24,286.94,274.71,313.37,333.42,283.65,310.74,357.43,340.46,325.42
1,2002,Heifers 250-299kg,Euro,380.13,385.37,365.64,328.66,325.25,397.76,373.85,332.73,388.89,387.97,391.70,386.36
2,2002,Heifers 300-349kg,Euro,455.14,448.89,418.16,398.31,390.32,442.41,467.65,437.81,440.45,423.91,449.08,452.85
3,2002,Heifers 350-399kg,Euro,510.17,520.52,486.00,522.77,467.19,509.89,507.63,519.85,528.76,488.88,484.95,512.38
4,2002,Heifers 400-449kg,Euro,589.39,576.71,565.87,576.65,559.87,579.56,577.66,566.68,569.65,548.61,559.21,545.53
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2021,Heifers 200-249kg,Euro,526.39,480.00,490.37,498.79,540.79,515.19,485.51,525.12,499.10,419.66,516.00,450.74
96,2021,Heifers 250-299kg,Euro,613.50,598.07,623.57,601.33,614.47,623.33,608.60,610.85,601.35,547.42,529.67,570.54
97,2021,Heifers 300-349kg,Euro,720.86,689.20,693.47,690.26,691.65,723.04,730.25,713.21,689.16,641.39,656.53,694.88
98,2021,Heifers 350-399kg,Euro,845.05,801.61,806.00,814.00,798.03,837.85,844.54,827.74,796.61,762.83,777.37,804.71


## Milk Statistics Dataset

In [None]:
df1["Category"] = [str(x + " (" + y + ")") for (x,y) in zip(df1["Category"],
                                                            df1["Unit"])]

In [None]:
df1

Unnamed: 0,Year,Category,Unit,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,Year.1
0,2002,Butter (Thousand tonnes),Thousand tonnes,2.3,3.8,9.8,16.6,20.1,18.2,17.1,14.6,12.2,11.5,5.9,3.3,135.400
1,2003,Butter (Thousand tonnes),Thousand tonnes,2.7,4.5,11.1,15.0,19.5,18.0,17.4,14.8,13.1,12.5,7,4.5,140.100
2,2004,Butter (Thousand tonnes),Thousand tonnes,2.9,4.1,8.0,14.8,18.2,17.7,18.7,15.8,14.3,11.9,6.9,3.5,136.800
3,2005,Butter (Thousand tonnes),Thousand tonnes,3.4,5.6,12.8,15.8,19.0,17.6,16.0,16.5,13.9,11.4,6.7,4.3,143.000
4,2006,Butter (Thousand tonnes),Thousand tonnes,3.9,5.2,11.0,14.9,19.8,17.2,15.8,14.7,13.8,10.5,17.3,5.0,149.100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
166,2017,Whole milk sales (Million litres),Million litres,26.2,24.2,27.7,26.4,28.7,30.0,27.1,27.6,27.1,27.3,27,27.1,326.400
167,2018,Whole milk sales (Million litres),Million litres,27.7,26.6,28.5,27.2,29.1,26.8,26.6,27.2,26.4,27.3,26.3,25.8,325.500
168,2019,Whole milk sales (Million litres),Million litres,26.9,25.3,29.0,28.5,28.2,29.4,29.1,28.6,26.8,28.9,26.8,25.8,27.775
169,2020,Whole milk sales (Million litres),Million litres,26.1,25.8,29.1,32.0,31.6,30.6,31.5,28.9,26.7,26.3,25,25.1,338.700


# Merging datasets

## Dataset 1

2014-2021, incorporating the data from all 3 datasets

### Filter out raw data for Ireland raw milk values

In [None]:
df4_ireland = df4_pivot[df4_pivot.Category == "Average price of raw milk from Ireland"]
df4_ireland["Category"] = [str(x + " (" + y + ")") for (x,y) in zip(df4_ireland["Category"],
                                                        df4_ireland["Unit"])]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [None]:
df4_ireland

Month,Year,Category,Unit,Apr,Aug,Dec,Feb,Jan,Jul,Jun,Mar,May,Nov,Oct,Sep
0,2002,Average price of raw milk from Ireland (Euro p...,Euro per 100kg,25.47,25.85,28.46,27.35,28.39,25.38,25.45,25.99,25.57,29.46,28.79,27.22
2,2003,Average price of raw milk from Ireland (Euro p...,Euro per 100kg,25.01,28.01,29.86,26.55,28.1,25.13,25.13,25.42,25.22,31.92,30.89,29.35
4,2004,Average price of raw milk from Ireland (Euro p...,Euro per 100kg,27.29,28.3201,30.1701,28.01,29.3501,27.8,27.49,27.7,27.49,31.5001,31.3001,30.0701
6,2005,Average price of raw milk from Ireland (Euro p...,Euro per 100kg,27.39,27.39,29.66,28.83,29.45,26.88,26.98,27.8,27.18,30.48,30.69,28.32
8,2006,Average price of raw milk from Ireland (Euro p...,Euro per 100kg,26.46,27.7,29.1,27.7,28.11,25.3,25.74,26.98,26.46,27.7,27.7,27.7
10,2007,Average price of raw milk from Ireland (Euro p...,Euro per 100kg,27.29,38.5,43.25,27.18,28.3,34.5,32.64,25.64,29.76,45.4,44.6,42.3
12,2008,Average price of raw milk from Ireland (Euro p...,Euro per 100kg,35.52,33.88,33.714691,40.67,41.09,33.57,32.85,39.64,34.29,33.98,33.98,33.26
14,2009,Average price of raw milk from Ireland (Euro p...,Euro per 100kg,22.34,22.86,27.49,26.15,27.6,22.04,21.83,22.45,22.14,28.63,28.21,25.02
16,2010,Average price of raw milk from Ireland (Euro p...,Euro per 100kg,27.8,31.92,32.73,27.18,27.08,31.2,30.79,26.98,30.58,34.18,35.83,33.67
18,2011,Average price of raw milk from Ireland (Euro p...,Euro per 100kg,32.15,34.57,35.84,32.92,31.76,33.7,33.31,32.44,32.05,37.97,38.26,37.0


### Concatenate datasets

In [None]:
df1_concat = pd.concat([df1, df4_ireland])

In [None]:
df1_concat

Unnamed: 0,Year,Category,Unit,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,Year.1
0,2002,Butter (Thousand tonnes),Thousand tonnes,2.30,3.80,9.80,16.60,20.10,18.20,17.10,14.60,12.20,11.50,5.9,3.30,135.4
1,2003,Butter (Thousand tonnes),Thousand tonnes,2.70,4.50,11.10,15.00,19.50,18.00,17.40,14.80,13.10,12.50,7,4.50,140.1
2,2004,Butter (Thousand tonnes),Thousand tonnes,2.90,4.10,8.00,14.80,18.20,17.70,18.70,15.80,14.30,11.90,6.9,3.50,136.8
3,2005,Butter (Thousand tonnes),Thousand tonnes,3.40,5.60,12.80,15.80,19.00,17.60,16.00,16.50,13.90,11.40,6.7,4.30,143.0
4,2006,Butter (Thousand tonnes),Thousand tonnes,3.90,5.20,11.00,14.90,19.80,17.20,15.80,14.70,13.80,10.50,17.3,5.00,149.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30,2017,Average price of raw milk from Ireland (Euro p...,Euro per 100kg,35.45,34.18,31.66,31.66,32.63,33.60,34.96,37.68,40.79,41.95,41.86,40.59,
32,2018,Average price of raw milk from Ireland (Euro p...,Euro per 100kg,39.43,37.00,33.31,31.08,31.17,31.66,31.76,33.41,37.00,38.75,39.04,36.90,
34,2019,Average price of raw milk from Ireland (Euro p...,Euro per 100kg,34.28,34.38,32.63,31.66,31.47,31.95,31.08,31.95,34.48,36.61,37.29,36.52,
36,2020,Average price of raw milk from Ireland (Euro p...,Euro per 100kg,34.67,34.86,31.66,30.01,30.59,31.76,32.83,33.89,36.61,39.14,39.14,37.49,


In [None]:
df1_concat = pd.concat([df1_concat, df3_pivot])

In [None]:
df1_concat = pd.concat([df1_concat, df2_pivot])

In [None]:
df1_concat

Unnamed: 0,Year,Category,Unit,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,Year.1
0,2002,Butter (Thousand tonnes),Thousand tonnes,2.3,3.8,9.8,16.6,20.1,18.2,17.1,14.6,12.2,11.5,5.9,3.3,135.4
1,2003,Butter (Thousand tonnes),Thousand tonnes,2.7,4.5,11.1,15.0,19.5,18.0,17.4,14.8,13.1,12.5,7,4.5,140.1
2,2004,Butter (Thousand tonnes),Thousand tonnes,2.9,4.1,8.0,14.8,18.2,17.7,18.7,15.8,14.3,11.9,6.9,3.5,136.8
3,2005,Butter (Thousand tonnes),Thousand tonnes,3.4,5.6,12.8,15.8,19.0,17.6,16.0,16.5,13.9,11.4,6.7,4.3,143.0
4,2006,Butter (Thousand tonnes),Thousand tonnes,3.9,5.2,11.0,14.9,19.8,17.2,15.8,14.7,13.8,10.5,17.3,5.0,149.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2021,Heifers 200-249kg,Euro,540.79,498.79,525.12,526.39,499.1,485.51,515.19,480.0,450.74,516.0,419.66,490.37,
96,2021,Heifers 250-299kg,Euro,614.47,601.33,610.85,613.5,601.35,608.6,623.33,598.07,570.54,529.67,547.42,623.57,
97,2021,Heifers 300-349kg,Euro,691.65,690.26,713.21,720.86,689.16,730.25,723.04,689.2,694.88,656.53,641.39,693.47,
98,2021,Heifers 350-399kg,Euro,798.03,814.0,827.74,845.05,796.61,844.54,837.85,801.61,804.71,777.37,762.83,806.0,


In [None]:
df1_concat

Unnamed: 0,Year,Category,Unit,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,Year.1
0,2002,Butter (Thousand tonnes),Thousand tonnes,2.3,3.8,9.8,16.6,20.1,18.2,17.1,14.6,12.2,11.5,5.9,3.3,135.4
1,2003,Butter (Thousand tonnes),Thousand tonnes,2.7,4.5,11.1,15.0,19.5,18.0,17.4,14.8,13.1,12.5,7,4.5,140.1
2,2004,Butter (Thousand tonnes),Thousand tonnes,2.9,4.1,8.0,14.8,18.2,17.7,18.7,15.8,14.3,11.9,6.9,3.5,136.8
3,2005,Butter (Thousand tonnes),Thousand tonnes,3.4,5.6,12.8,15.8,19.0,17.6,16.0,16.5,13.9,11.4,6.7,4.3,143.0
4,2006,Butter (Thousand tonnes),Thousand tonnes,3.9,5.2,11.0,14.9,19.8,17.2,15.8,14.7,13.8,10.5,17.3,5.0,149.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2021,Heifers 200-249kg,Euro,540.79,498.79,525.12,526.39,499.1,485.51,515.19,480.0,450.74,516.0,419.66,490.37,
96,2021,Heifers 250-299kg,Euro,614.47,601.33,610.85,613.5,601.35,608.6,623.33,598.07,570.54,529.67,547.42,623.57,
97,2021,Heifers 300-349kg,Euro,691.65,690.26,713.21,720.86,689.16,730.25,723.04,689.2,694.88,656.53,641.39,693.47,
98,2021,Heifers 350-399kg,Euro,798.03,814.0,827.74,845.05,796.61,844.54,837.85,801.61,804.71,777.37,762.83,806.0,


### Drop unused columns

In [None]:
df1_concat = df1_concat.drop("Year.1", axis=1)

### Melt dataset to sort data by month and year 

In [None]:
df1_melt = df1_concat.melt(id_vars=["Year","Category","Unit"], value_vars=df1_concat.columns[3:], var_name="Month", value_name="Amount")

In [None]:
df1_melt

Unnamed: 0,Year,Category,Unit,Month,Amount
0,2002,Butter (Thousand tonnes),Thousand tonnes,Jan,2.3
1,2003,Butter (Thousand tonnes),Thousand tonnes,Jan,2.7
2,2004,Butter (Thousand tonnes),Thousand tonnes,Jan,2.9
3,2005,Butter (Thousand tonnes),Thousand tonnes,Jan,3.4
4,2006,Butter (Thousand tonnes),Thousand tonnes,Jan,3.9
...,...,...,...,...,...
3967,2021,Heifers 200-249kg,Euro,Dec,490.37
3968,2021,Heifers 250-299kg,Euro,Dec,623.57
3969,2021,Heifers 300-349kg,Euro,Dec,693.47
3970,2021,Heifers 350-399kg,Euro,Dec,806.0


### Remove duplicate data

In [None]:
df1_melt = df1_melt.drop_duplicates()

In [None]:
df1_melt

Unnamed: 0,Year,Category,Unit,Month,Amount
0,2002,Butter (Thousand tonnes),Thousand tonnes,Jan,2.3
1,2003,Butter (Thousand tonnes),Thousand tonnes,Jan,2.7
2,2004,Butter (Thousand tonnes),Thousand tonnes,Jan,2.9
3,2005,Butter (Thousand tonnes),Thousand tonnes,Jan,3.4
4,2006,Butter (Thousand tonnes),Thousand tonnes,Jan,3.9
...,...,...,...,...,...
3967,2021,Heifers 200-249kg,Euro,Dec,490.37
3968,2021,Heifers 250-299kg,Euro,Dec,623.57
3969,2021,Heifers 300-349kg,Euro,Dec,693.47
3970,2021,Heifers 350-399kg,Euro,Dec,806.0


In [None]:
df1_melt = df1_melt.reset_index().drop("index", axis=1)

In [None]:
df1_melt

Unnamed: 0,Year,Category,Unit,Month,Amount
0,2002,Butter (Thousand tonnes),Thousand tonnes,Jan,2.3
1,2003,Butter (Thousand tonnes),Thousand tonnes,Jan,2.7
2,2004,Butter (Thousand tonnes),Thousand tonnes,Jan,2.9
3,2005,Butter (Thousand tonnes),Thousand tonnes,Jan,3.4
4,2006,Butter (Thousand tonnes),Thousand tonnes,Jan,3.9
...,...,...,...,...,...
3967,2021,Heifers 200-249kg,Euro,Dec,490.37
3968,2021,Heifers 250-299kg,Euro,Dec,623.57
3969,2021,Heifers 300-349kg,Euro,Dec,693.47
3970,2021,Heifers 350-399kg,Euro,Dec,806.0


### Convert Month value to numeric

In [None]:
df1_melt["Month"] = [months[x] for x in df1_melt["Month"]]

### Prepare data for pivoting by creating custome index

In [None]:
df1_melt["Index"] = ["-".join([str(x),str(y),str(z)]) for (x,y,z) in zip(df1_melt["Year"], df1_melt["Month"],
                                               df1_melt["Category"])]

In [None]:
df1_melt["Index"]

0       2002-1-Butter (Thousand tonnes)
1       2003-1-Butter (Thousand tonnes)
2       2004-1-Butter (Thousand tonnes)
3       2005-1-Butter (Thousand tonnes)
4       2006-1-Butter (Thousand tonnes)
                     ...               
3967          2021-12-Heifers 200-249kg
3968          2021-12-Heifers 250-299kg
3969          2021-12-Heifers 300-349kg
3970          2021-12-Heifers 350-399kg
3971          2021-12-Heifers 400-449kg
Name: Index, Length: 3972, dtype: object

In [None]:
df1_melt[df1_melt.Index == "2021-12-Maize meal (Euro per Tonne)"]

Unnamed: 0,Year,Category,Unit,Month,Amount,Index
3871,2021,Maize meal (Euro per Tonne),Euro per Tonne,12,323,2021-12-Maize meal (Euro per Tonne)


### Pivot dataset, using custom index to keep reference to values, using Category as the new values

In [None]:
df1_pivot = df1_melt.pivot(index="Index", columns=["Category"], values="Amount").reset_index()

In [None]:
df1_pivot

Category,Index,Average price of raw milk from Ireland (Euro per 100kg),Butter (Thousand tonnes),Calf meal (16-18% protein) (Euro per Tonne),Calf nuts and cubes (16-18% protein) (Euro per Tonne),Cheese (Thousand tonnes),Cow slaughterings (Thousand tonnes),Dairy meal (16-18% protein) (Euro per Tonne),Dairy nuts and cubes (16-18% protein) (Euro per Tonne),Domestic milk intake (Million litres),...,Heifers 200-249kg,Heifers 250-299kg,Heifers 300-349kg,Heifers 350-399kg,Heifers 400-449kg,Imported milk intake (Million litres),Maize meal (Euro per Tonne),Skimmed & semi-skimmed milk sales (Million litres),Skimmed milk powder (Thousand tonnes),Whole milk sales (Million litres)
0,2002-1-Average price of raw milk from Ireland ...,28.39,,,,,,,,,...,,,,,,,,,,
1,2002-1-Butter (Thousand tonnes),,2.3,,,,,,,,...,,,,,,,,,,
2,2002-1-Cheese (Thousand tonnes),,,,,1.2,,,,,...,,,,,,,,,,
3,2002-1-Domestic milk intake (Million litres),,,,,,,,,117.3,...,,,,,,,,,,
4,2002-1-Fat content (Percent),,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3967,2021-9-Imported milk intake (Million litres),,,,,,,,,,...,,,,,,0.0,,,,
3968,2021-9-Maize meal (Euro per Tonne),,,,,,,,,,...,,,,,,,299,,,
3969,2021-9-Skimmed & semi-skimmed milk sales (Mill...,,,,,,,,,,...,,,,,,,,15.1,,
3970,2021-9-Skimmed milk powder (Thousand tonnes),,,,,,,,,,...,,,,,,,,,11.1,


### Reset index and divide up into relevant columns

In [None]:
df1_pivot = df1_pivot.reset_index().drop("index", axis=1)

In [None]:
df1_pivot

Category,Index,Average price of raw milk from Ireland (Euro per 100kg),Butter (Thousand tonnes),Calf meal (16-18% protein) (Euro per Tonne),Calf nuts and cubes (16-18% protein) (Euro per Tonne),Cheese (Thousand tonnes),Cow slaughterings (Thousand tonnes),Dairy meal (16-18% protein) (Euro per Tonne),Dairy nuts and cubes (16-18% protein) (Euro per Tonne),Domestic milk intake (Million litres),...,Heifers 200-249kg,Heifers 250-299kg,Heifers 300-349kg,Heifers 350-399kg,Heifers 400-449kg,Imported milk intake (Million litres),Maize meal (Euro per Tonne),Skimmed & semi-skimmed milk sales (Million litres),Skimmed milk powder (Thousand tonnes),Whole milk sales (Million litres)
0,2002-1-Average price of raw milk from Ireland ...,28.39,,,,,,,,,...,,,,,,,,,,
1,2002-1-Butter (Thousand tonnes),,2.3,,,,,,,,...,,,,,,,,,,
2,2002-1-Cheese (Thousand tonnes),,,,,1.2,,,,,...,,,,,,,,,,
3,2002-1-Domestic milk intake (Million litres),,,,,,,,,117.3,...,,,,,,,,,,
4,2002-1-Fat content (Percent),,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3967,2021-9-Imported milk intake (Million litres),,,,,,,,,,...,,,,,,0.0,,,,
3968,2021-9-Maize meal (Euro per Tonne),,,,,,,,,,...,,,,,,,299,,,
3969,2021-9-Skimmed & semi-skimmed milk sales (Mill...,,,,,,,,,,...,,,,,,,,15.1,,
3970,2021-9-Skimmed milk powder (Thousand tonnes),,,,,,,,,,...,,,,,,,,,11.1,


In [None]:
df1_pivot["Index"] = [x.split("-") for x in df1_pivot.Index]

In [None]:
df1_pivot

Category,Index,Average price of raw milk from Ireland (Euro per 100kg),Butter (Thousand tonnes),Calf meal (16-18% protein) (Euro per Tonne),Calf nuts and cubes (16-18% protein) (Euro per Tonne),Cheese (Thousand tonnes),Cow slaughterings (Thousand tonnes),Dairy meal (16-18% protein) (Euro per Tonne),Dairy nuts and cubes (16-18% protein) (Euro per Tonne),Domestic milk intake (Million litres),...,Heifers 200-249kg,Heifers 250-299kg,Heifers 300-349kg,Heifers 350-399kg,Heifers 400-449kg,Imported milk intake (Million litres),Maize meal (Euro per Tonne),Skimmed & semi-skimmed milk sales (Million litres),Skimmed milk powder (Thousand tonnes),Whole milk sales (Million litres)
0,"[2002, 1, Average price of raw milk from Irela...",28.39,,,,,,,,,...,,,,,,,,,,
1,"[2002, 1, Butter (Thousand tonnes)]",,2.3,,,,,,,,...,,,,,,,,,,
2,"[2002, 1, Cheese (Thousand tonnes)]",,,,,1.2,,,,,...,,,,,,,,,,
3,"[2002, 1, Domestic milk intake (Million litres)]",,,,,,,,,117.3,...,,,,,,,,,,
4,"[2002, 1, Fat content (Percent)]",,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3967,"[2021, 9, Imported milk intake (Million litres)]",,,,,,,,,,...,,,,,,0.0,,,,
3968,"[2021, 9, Maize meal (Euro per Tonne)]",,,,,,,,,,...,,,,,,,299,,,
3969,"[2021, 9, Skimmed & semi, skimmed milk sales (...",,,,,,,,,,...,,,,,,,,15.1,,
3970,"[2021, 9, Skimmed milk powder (Thousand tonnes)]",,,,,,,,,,...,,,,,,,,,11.1,


In [None]:
df1_pivot["Year"] = [int(float(x[0])) for x in df1_pivot.Index]
df1_pivot["Month"] = [int(x[1]) for x in df1_pivot.Index]

In [None]:
df1_pivot

Category,Index,Average price of raw milk from Ireland (Euro per 100kg),Butter (Thousand tonnes),Calf meal (16-18% protein) (Euro per Tonne),Calf nuts and cubes (16-18% protein) (Euro per Tonne),Cheese (Thousand tonnes),Cow slaughterings (Thousand tonnes),Dairy meal (16-18% protein) (Euro per Tonne),Dairy nuts and cubes (16-18% protein) (Euro per Tonne),Domestic milk intake (Million litres),...,Heifers 300-349kg,Heifers 350-399kg,Heifers 400-449kg,Imported milk intake (Million litres),Maize meal (Euro per Tonne),Skimmed & semi-skimmed milk sales (Million litres),Skimmed milk powder (Thousand tonnes),Whole milk sales (Million litres),Year,Month
0,"[2002, 1, Average price of raw milk from Irela...",28.39,,,,,,,,,...,,,,,,,,,2002,1
1,"[2002, 1, Butter (Thousand tonnes)]",,2.3,,,,,,,,...,,,,,,,,,2002,1
2,"[2002, 1, Cheese (Thousand tonnes)]",,,,,1.2,,,,,...,,,,,,,,,2002,1
3,"[2002, 1, Domestic milk intake (Million litres)]",,,,,,,,,117.3,...,,,,,,,,,2002,1
4,"[2002, 1, Fat content (Percent)]",,,,,,,,,,...,,,,,,,,,2002,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3967,"[2021, 9, Imported milk intake (Million litres)]",,,,,,,,,,...,,,,0.0,,,,,2021,9
3968,"[2021, 9, Maize meal (Euro per Tonne)]",,,,,,,,,,...,,,,,299,,,,2021,9
3969,"[2021, 9, Skimmed & semi, skimmed milk sales (...",,,,,,,,,,...,,,,,,15.1,,,2021,9
3970,"[2021, 9, Skimmed milk powder (Thousand tonnes)]",,,,,,,,,,...,,,,,,,11.1,,2021,9


### Sort columns

In [None]:
cols_sorted = list(df1_pivot.columns[-2:]) + list(df1_pivot.columns[1:-2])
cols_sorted

['Year',
 'Month',
 'Average price of raw milk from Ireland (Euro per 100kg)',
 'Butter (Thousand tonnes)',
 'Calf meal (16-18% protein) (Euro per Tonne)',
 'Calf nuts and cubes (16-18% protein) (Euro per Tonne)',
 'Cheese (Thousand tonnes)',
 'Cow slaughterings (Thousand tonnes)',
 'Dairy meal (16-18% protein) (Euro per Tonne)',
 'Dairy nuts and cubes (16-18% protein) (Euro per Tonne)',
 'Domestic milk intake (Million litres)',
 'Fat content (Percent)',
 'Heifers 200-249kg',
 'Heifers 250-299kg',
 'Heifers 300-349kg',
 'Heifers 350-399kg',
 'Heifers 400-449kg',
 'Imported milk intake (Million litres)',
 'Maize meal (Euro per Tonne)',
 'Skimmed & semi-skimmed milk sales (Million litres)',
 'Skimmed milk powder (Thousand tonnes)',
 'Whole milk sales (Million litres)']

In [None]:
df1_pivot = df1_pivot[cols_sorted]

In [None]:
df1_pivot

Category,Year,Month,Average price of raw milk from Ireland (Euro per 100kg),Butter (Thousand tonnes),Calf meal (16-18% protein) (Euro per Tonne),Calf nuts and cubes (16-18% protein) (Euro per Tonne),Cheese (Thousand tonnes),Cow slaughterings (Thousand tonnes),Dairy meal (16-18% protein) (Euro per Tonne),Dairy nuts and cubes (16-18% protein) (Euro per Tonne),...,Heifers 200-249kg,Heifers 250-299kg,Heifers 300-349kg,Heifers 350-399kg,Heifers 400-449kg,Imported milk intake (Million litres),Maize meal (Euro per Tonne),Skimmed & semi-skimmed milk sales (Million litres),Skimmed milk powder (Thousand tonnes),Whole milk sales (Million litres)
0,2002,1,28.39,,,,,,,,...,,,,,,,,,,
1,2002,1,,2.3,,,,,,,...,,,,,,,,,,
2,2002,1,,,,,1.2,,,,...,,,,,,,,,,
3,2002,1,,,,,,,,,...,,,,,,,,,,
4,2002,1,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3967,2021,9,,,,,,,,,...,,,,,,0.0,,,,
3968,2021,9,,,,,,,,,...,,,,,,,299,,,
3969,2021,9,,,,,,,,,...,,,,,,,,15.1,,
3970,2021,9,,,,,,,,,...,,,,,,,,,11.1,


### Group by Year and month to allow dataset to represent all data per year and month in one row

In [None]:
df_pivot = df1_pivot.groupby(["Year", "Month"]).sum()

In [None]:
df_pivot = df_pivot.reset_index()

In [None]:
df_pivot = df_pivot.sort_values(["Year", "Month"])

In [None]:
df_pivot

Category,Year,Month,Average price of raw milk from Ireland (Euro per 100kg),Butter (Thousand tonnes),Calf meal (16-18% protein) (Euro per Tonne),Calf nuts and cubes (16-18% protein) (Euro per Tonne),Cheese (Thousand tonnes),Cow slaughterings (Thousand tonnes),Dairy meal (16-18% protein) (Euro per Tonne),Dairy nuts and cubes (16-18% protein) (Euro per Tonne),...,Heifers 200-249kg,Heifers 250-299kg,Heifers 300-349kg,Heifers 350-399kg,Heifers 400-449kg,Imported milk intake (Million litres),Maize meal (Euro per Tonne),Skimmed & semi-skimmed milk sales (Million litres),Skimmed milk powder (Thousand tonnes),Whole milk sales (Million litres)
0,2002,1,28.39,2.3,0,0,1.2,0,0,0,...,274.71,325.25,390.32,467.19,559.87,16.8,0,9.9,3.1,36.1
1,2002,2,27.35,3.8,0,0,1.7,0,0,0,...,286.94,328.66,398.31,522.77,576.65,21.9,0,9.3,2.9,32.9
2,2002,3,25.99,9.8,0,0,5.2,0,0,0,...,283.65,332.73,437.81,519.85,566.68,29.2,0,10.1,5.3,34.6
3,2002,4,25.47,16.6,0,0,13.7,0,0,0,...,279.94,380.13,455.14,510.17,589.39,31.2,0,10.0,8.9,34.7
4,2002,5,25.57,20.1,0,0,14.8,0,0,0,...,310.74,388.89,440.45,528.76,569.65,31.6,0,10.3,11.8,35.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,2021,8,39.23,28.8,.,355,29.3,53.3,316,324,...,480.00,598.07,689.20,801.61,913.59,0.0,293,15.8,16.4,29.0
236,2021,9,42.44,26.5,.,360,33.2,55.7,323,332,...,450.74,570.54,694.88,804.71,915.68,0.0,299,15.1,11.1,26.6
237,2021,10,46.52,21.6,.,365,27.5,53.5,328,339,...,516.00,529.67,656.53,777.37,890.95,0.0,309,15.8,5.5,26.1
238,2021,11,48.65,17.8,.,370,20.9,55.9,333,344,...,419.66,547.42,641.39,762.83,888.34,0.0,314,15.4,0.0,25.5


### Remove data from before 2014

In [None]:
df_pivot = df_pivot[df_pivot.Year > 2013]

In [None]:
df_pivot

Category,Year,Month,Average price of raw milk from Ireland (Euro per 100kg),Butter (Thousand tonnes),Calf meal (16-18% protein) (Euro per Tonne),Calf nuts and cubes (16-18% protein) (Euro per Tonne),Cheese (Thousand tonnes),Cow slaughterings (Thousand tonnes),Dairy meal (16-18% protein) (Euro per Tonne),Dairy nuts and cubes (16-18% protein) (Euro per Tonne),...,Heifers 200-249kg,Heifers 250-299kg,Heifers 300-349kg,Heifers 350-399kg,Heifers 400-449kg,Imported milk intake (Million litres),Maize meal (Euro per Tonne),Skimmed & semi-skimmed milk sales (Million litres),Skimmed milk powder (Thousand tonnes),Whole milk sales (Million litres)
0,2014,1,42.34,4.4,.,329,2.3,49.4,300,302,...,498.38,597.40,704.50,789.13,867.04,38.8,252,16.9,0.0,23.5
1,2014,2,41.76,6.2,.,329,6.5,45.4,303,303,...,506.55,612.72,704.99,790.58,884.23,37.6,248,15.4,0.8,21.3
2,2014,3,39.04,14.4,.,305,18.4,48.6,290,288,...,497.04,588.64,682.82,789.94,906.31,40.8,244,18.7,3.9,25.4
3,2014,4,38.55,17.3,.,313,22.8,49.3,276,289,...,472.67,589.84,689.85,806.49,908.97,43.6,233,15.0,6.5,21.9
4,2014,5,37.10,21.7,.,315,24.8,48.8,284,292,...,509.21,595.29,691.55,808.22,893.82,51.1,240,17.4,11.9,24.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,2021,8,39.23,28.8,.,355,29.3,53.3,316,324,...,480.00,598.07,689.20,801.61,913.59,0.0,293,15.8,16.4,29.0
92,2021,9,42.44,26.5,.,360,33.2,55.7,323,332,...,450.74,570.54,694.88,804.71,915.68,0.0,299,15.1,11.1,26.6
93,2021,10,46.52,21.6,.,365,27.5,53.5,328,339,...,516.00,529.67,656.53,777.37,890.95,0.0,309,15.8,5.5,26.1
94,2021,11,48.65,17.8,.,370,20.9,55.9,333,344,...,419.66,547.42,641.39,762.83,888.34,0.0,314,15.4,0.0,25.5


### Impute values

In [None]:
from sklearn.impute import KNNImputer

imputer = KNNImputer(n_neighbors=8)

In [None]:
len(df_pivot.columns)

21

In [None]:
targets = list(list(df_pivot.columns[:16])+list(df_pivot.columns[17:]))

In [None]:
targets

['Year',
 'Month',
 'Average price of raw milk from Ireland (Euro per 100kg)',
 'Butter (Thousand tonnes)',
 'Calf nuts and cubes (16-18% protein) (Euro per Tonne)',
 'Cheese (Thousand tonnes)',
 'Cow slaughterings (Thousand tonnes)',
 'Dairy meal (16-18% protein) (Euro per Tonne)',
 'Dairy nuts and cubes (16-18% protein) (Euro per Tonne)',
 'Domestic milk intake (Million litres)',
 'Fat content (Percent)',
 'Heifers 200-249kg',
 'Heifers 250-299kg',
 'Heifers 300-349kg',
 'Heifers 350-399kg',
 'Heifers 400-449kg',
 'Maize meal (Euro per Tonne)',
 'Skimmed & semi-skimmed milk sales (Million litres)',
 'Skimmed milk powder (Thousand tonnes)',
 'Whole milk sales (Million litres)']

In [None]:
df_imputed = df_pivot[targets]
df_imputed = df_imputed.replace(0, np.nan)
y = df_pivot[df_pivot.columns[17]]
y2 = imputer.fit_transform(df_imputed)

In [None]:
df_pivot.columns[16]

'Imported milk intake (Million litres)'

In [None]:
y2

array([[2.0140e+03, 1.0000e+00, 4.2340e+01, ..., 1.6900e+01, 4.6125e+00,
        2.3500e+01],
       [2.0140e+03, 2.0000e+00, 4.1760e+01, ..., 1.5400e+01, 8.0000e-01,
        2.1300e+01],
       [2.0140e+03, 3.0000e+00, 3.9040e+01, ..., 1.8700e+01, 3.9000e+00,
        2.5400e+01],
       ...,
       [2.0210e+03, 1.0000e+01, 4.6520e+01, ..., 1.5800e+01, 5.5000e+00,
        2.6100e+01],
       [2.0210e+03, 1.1000e+01, 4.8650e+01, ..., 1.5400e+01, 6.1250e+00,
        2.5500e+01],
       [2.0210e+03, 1.2000e+01, 4.8460e+01, ..., 1.5000e+01, 7.4000e+00,
        2.5200e+01]])

In [None]:
df_imputed = pd.DataFrame(y2, columns=targets)

In [None]:
df_imputed[df_pivot.columns[16]] = y

In [None]:
df_imputed

Unnamed: 0,Year,Month,Average price of raw milk from Ireland (Euro per 100kg),Butter (Thousand tonnes),Calf nuts and cubes (16-18% protein) (Euro per Tonne),Cheese (Thousand tonnes),Cow slaughterings (Thousand tonnes),Dairy meal (16-18% protein) (Euro per Tonne),Dairy nuts and cubes (16-18% protein) (Euro per Tonne),Domestic milk intake (Million litres),...,Heifers 200-249kg,Heifers 250-299kg,Heifers 300-349kg,Heifers 350-399kg,Heifers 400-449kg,Maize meal (Euro per Tonne),Skimmed & semi-skimmed milk sales (Million litres),Skimmed milk powder (Thousand tonnes),Whole milk sales (Million litres),Imported milk intake (Million litres)
0,2014.0,1.0,42.34,4.4,329.0,2.3,49.4,300.0,302.0,132.0,...,498.38,597.40,704.50,789.13,867.04,252.0,16.9,4.6125,23.5,252
1,2014.0,2.0,41.76,6.2,329.0,6.5,45.4,303.0,303.0,214.0,...,506.55,612.72,704.99,790.58,884.23,248.0,15.4,0.8000,21.3,248
2,2014.0,3.0,39.04,14.4,305.0,18.4,48.6,290.0,288.0,470.7,...,497.04,588.64,682.82,789.94,906.31,244.0,18.7,3.9000,25.4,244
3,2014.0,4.0,38.55,17.3,313.0,22.8,49.3,276.0,289.0,697.0,...,472.67,589.84,689.85,806.49,908.97,233.0,15.0,6.5000,21.9,233
4,2014.0,5.0,37.10,21.7,315.0,24.8,48.8,284.0,292.0,785.5,...,509.21,595.29,691.55,808.22,893.82,240.0,17.4,11.9000,24.9,240
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,2021.0,8.0,39.23,28.8,355.0,29.3,53.3,316.0,324.0,917.4,...,480.00,598.07,689.20,801.61,913.59,293.0,15.8,16.4000,29.0,293
92,2021.0,9.0,42.44,26.5,360.0,33.2,55.7,323.0,332.0,776.7,...,450.74,570.54,694.88,804.71,915.68,299.0,15.1,11.1000,26.6,299
93,2021.0,10.0,46.52,21.6,365.0,27.5,53.5,328.0,339.0,652.8,...,516.00,529.67,656.53,777.37,890.95,309.0,15.8,5.5000,26.1,309
94,2021.0,11.0,48.65,17.8,370.0,20.9,55.9,333.0,344.0,460.6,...,419.66,547.42,641.39,762.83,888.34,314.0,15.4,6.1250,25.5,314


In [None]:
df_imputed = df_imputed.rename(translations, axis=1)

In [None]:
df_imputed

Unnamed: 0,Year,Month,Raw milk price,Butter,Calf nuts value,Cheese,Cow slaughterings (Thousand tonnes),Dairy meal value,Dairy nuts value,Domestic milk intake,...,Heifers 200-249kg,Heifers 250-299kg,Heifers 300-349kg,Heifers 350-399kg,Heifers 400-449kg,Maize meal value,Skimmed milk sales,Skimmed milk powder,Whole milk sales,Imported milk intake
0,2014.0,1.0,42.34,4.4,329.0,2.3,49.4,300.0,302.0,132.0,...,498.38,597.40,704.50,789.13,867.04,252.0,16.9,4.6125,23.5,252
1,2014.0,2.0,41.76,6.2,329.0,6.5,45.4,303.0,303.0,214.0,...,506.55,612.72,704.99,790.58,884.23,248.0,15.4,0.8000,21.3,248
2,2014.0,3.0,39.04,14.4,305.0,18.4,48.6,290.0,288.0,470.7,...,497.04,588.64,682.82,789.94,906.31,244.0,18.7,3.9000,25.4,244
3,2014.0,4.0,38.55,17.3,313.0,22.8,49.3,276.0,289.0,697.0,...,472.67,589.84,689.85,806.49,908.97,233.0,15.0,6.5000,21.9,233
4,2014.0,5.0,37.10,21.7,315.0,24.8,48.8,284.0,292.0,785.5,...,509.21,595.29,691.55,808.22,893.82,240.0,17.4,11.9000,24.9,240
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,2021.0,8.0,39.23,28.8,355.0,29.3,53.3,316.0,324.0,917.4,...,480.00,598.07,689.20,801.61,913.59,293.0,15.8,16.4000,29.0,293
92,2021.0,9.0,42.44,26.5,360.0,33.2,55.7,323.0,332.0,776.7,...,450.74,570.54,694.88,804.71,915.68,299.0,15.1,11.1000,26.6,299
93,2021.0,10.0,46.52,21.6,365.0,27.5,53.5,328.0,339.0,652.8,...,516.00,529.67,656.53,777.37,890.95,309.0,15.8,5.5000,26.1,309
94,2021.0,11.0,48.65,17.8,370.0,20.9,55.9,333.0,344.0,460.6,...,419.66,547.42,641.39,762.83,888.34,314.0,15.4,6.1250,25.5,314


In [None]:
df_imputed.to_csv(cwd+"milk dataset 1.csv")

## Dataset 2

2007-2021, with Milk Statistics & Heifer Cows

### Concatenate datasets

In [None]:
df2_concat = pd.concat([df1, df4_ireland])

In [None]:
df2_concat

Unnamed: 0,Year,Category,Unit,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,Year.1
0,2002,Butter (Thousand tonnes),Thousand tonnes,2.30,3.80,9.80,16.60,20.10,18.20,17.10,14.60,12.20,11.50,5.9,3.30,135.4
1,2003,Butter (Thousand tonnes),Thousand tonnes,2.70,4.50,11.10,15.00,19.50,18.00,17.40,14.80,13.10,12.50,7,4.50,140.1
2,2004,Butter (Thousand tonnes),Thousand tonnes,2.90,4.10,8.00,14.80,18.20,17.70,18.70,15.80,14.30,11.90,6.9,3.50,136.8
3,2005,Butter (Thousand tonnes),Thousand tonnes,3.40,5.60,12.80,15.80,19.00,17.60,16.00,16.50,13.90,11.40,6.7,4.30,143.0
4,2006,Butter (Thousand tonnes),Thousand tonnes,3.90,5.20,11.00,14.90,19.80,17.20,15.80,14.70,13.80,10.50,17.3,5.00,149.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30,2017,Average price of raw milk from Ireland (Euro p...,Euro per 100kg,35.45,34.18,31.66,31.66,32.63,33.60,34.96,37.68,40.79,41.95,41.86,40.59,
32,2018,Average price of raw milk from Ireland (Euro p...,Euro per 100kg,39.43,37.00,33.31,31.08,31.17,31.66,31.76,33.41,37.00,38.75,39.04,36.90,
34,2019,Average price of raw milk from Ireland (Euro p...,Euro per 100kg,34.28,34.38,32.63,31.66,31.47,31.95,31.08,31.95,34.48,36.61,37.29,36.52,
36,2020,Average price of raw milk from Ireland (Euro p...,Euro per 100kg,34.67,34.86,31.66,30.01,30.59,31.76,32.83,33.89,36.61,39.14,39.14,37.49,


In [None]:
df2_pivot

Month,Year,Category,Unit,Apr,Aug,Dec,Feb,Jan,Jul,Jun,Mar,May,Nov,Oct,Sep
0,2002,Heifers 200-249kg,Euro,279.94,327.99,339.24,286.94,274.71,313.37,333.42,283.65,310.74,357.43,340.46,325.42
1,2002,Heifers 250-299kg,Euro,380.13,385.37,365.64,328.66,325.25,397.76,373.85,332.73,388.89,387.97,391.70,386.36
2,2002,Heifers 300-349kg,Euro,455.14,448.89,418.16,398.31,390.32,442.41,467.65,437.81,440.45,423.91,449.08,452.85
3,2002,Heifers 350-399kg,Euro,510.17,520.52,486.00,522.77,467.19,509.89,507.63,519.85,528.76,488.88,484.95,512.38
4,2002,Heifers 400-449kg,Euro,589.39,576.71,565.87,576.65,559.87,579.56,577.66,566.68,569.65,548.61,559.21,545.53
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2021,Heifers 200-249kg,Euro,526.39,480.00,490.37,498.79,540.79,515.19,485.51,525.12,499.10,419.66,516.00,450.74
96,2021,Heifers 250-299kg,Euro,613.50,598.07,623.57,601.33,614.47,623.33,608.60,610.85,601.35,547.42,529.67,570.54
97,2021,Heifers 300-349kg,Euro,720.86,689.20,693.47,690.26,691.65,723.04,730.25,713.21,689.16,641.39,656.53,694.88
98,2021,Heifers 350-399kg,Euro,845.05,801.61,806.00,814.00,798.03,837.85,844.54,827.74,796.61,762.83,777.37,804.71


In [None]:
df2_concat = pd.concat([df2_concat, df2_pivot])

In [None]:
df2_concat

Unnamed: 0,Year,Category,Unit,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,Year.1
0,2002,Butter (Thousand tonnes),Thousand tonnes,2.30,3.80,9.80,16.60,20.10,18.20,17.10,14.60,12.20,11.50,5.9,3.30,135.4
1,2003,Butter (Thousand tonnes),Thousand tonnes,2.70,4.50,11.10,15.00,19.50,18.00,17.40,14.80,13.10,12.50,7,4.50,140.1
2,2004,Butter (Thousand tonnes),Thousand tonnes,2.90,4.10,8.00,14.80,18.20,17.70,18.70,15.80,14.30,11.90,6.9,3.50,136.8
3,2005,Butter (Thousand tonnes),Thousand tonnes,3.40,5.60,12.80,15.80,19.00,17.60,16.00,16.50,13.90,11.40,6.7,4.30,143.0
4,2006,Butter (Thousand tonnes),Thousand tonnes,3.90,5.20,11.00,14.90,19.80,17.20,15.80,14.70,13.80,10.50,17.3,5.00,149.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2021,Heifers 200-249kg,Euro,540.79,498.79,525.12,526.39,499.10,485.51,515.19,480.00,450.74,516.00,419.66,490.37,
96,2021,Heifers 250-299kg,Euro,614.47,601.33,610.85,613.50,601.35,608.60,623.33,598.07,570.54,529.67,547.42,623.57,
97,2021,Heifers 300-349kg,Euro,691.65,690.26,713.21,720.86,689.16,730.25,723.04,689.20,694.88,656.53,641.39,693.47,
98,2021,Heifers 350-399kg,Euro,798.03,814.00,827.74,845.05,796.61,844.54,837.85,801.61,804.71,777.37,762.83,806.00,


In [None]:
df2_concat

Unnamed: 0,Year,Category,Unit,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,Year.1
0,2002,Butter (Thousand tonnes),Thousand tonnes,2.30,3.80,9.80,16.60,20.10,18.20,17.10,14.60,12.20,11.50,5.9,3.30,135.4
1,2003,Butter (Thousand tonnes),Thousand tonnes,2.70,4.50,11.10,15.00,19.50,18.00,17.40,14.80,13.10,12.50,7,4.50,140.1
2,2004,Butter (Thousand tonnes),Thousand tonnes,2.90,4.10,8.00,14.80,18.20,17.70,18.70,15.80,14.30,11.90,6.9,3.50,136.8
3,2005,Butter (Thousand tonnes),Thousand tonnes,3.40,5.60,12.80,15.80,19.00,17.60,16.00,16.50,13.90,11.40,6.7,4.30,143.0
4,2006,Butter (Thousand tonnes),Thousand tonnes,3.90,5.20,11.00,14.90,19.80,17.20,15.80,14.70,13.80,10.50,17.3,5.00,149.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2021,Heifers 200-249kg,Euro,540.79,498.79,525.12,526.39,499.10,485.51,515.19,480.00,450.74,516.00,419.66,490.37,
96,2021,Heifers 250-299kg,Euro,614.47,601.33,610.85,613.50,601.35,608.60,623.33,598.07,570.54,529.67,547.42,623.57,
97,2021,Heifers 300-349kg,Euro,691.65,690.26,713.21,720.86,689.16,730.25,723.04,689.20,694.88,656.53,641.39,693.47,
98,2021,Heifers 350-399kg,Euro,798.03,814.00,827.74,845.05,796.61,844.54,837.85,801.61,804.71,777.37,762.83,806.00,


### Drop unused columns

In [None]:
df2_concat = df2_concat.drop("Year.1", axis=1)

In [None]:
df2_concat["Category"].value_counts()

Butter (Thousand tonnes)                                   20
Cheese (Thousand tonnes)                                   20
Domestic milk intake (Million litres)                      20
Fat content (Percent)                                      20
Imported milk intake (Million litres)                      20
Skimmed & semi-skimmed milk sales (Million litres)         20
Skimmed milk powder (Thousand tonnes)                      20
Whole milk sales (Million litres)                          20
Average price of raw milk from Ireland (Euro per 100kg)    20
Heifers 200-249kg                                          20
Heifers 250-299kg                                          20
Heifers 300-349kg                                          20
Heifers 350-399kg                                          20
Heifers 400-449kg                                          20
Cow slaughterings (Thousand tonnes)                        11
Name: Category, dtype: int64

### Melt dataset to sort data by month and year 

In [None]:
df_melt2 = df2_concat.melt(id_vars=["Year","Category","Unit"], value_vars=df2_concat.columns[3:], var_name="Month", value_name="Amount")

In [None]:
df_melt2

Unnamed: 0,Year,Category,Unit,Month,Amount
0,2002,Butter (Thousand tonnes),Thousand tonnes,Jan,2.3
1,2003,Butter (Thousand tonnes),Thousand tonnes,Jan,2.7
2,2004,Butter (Thousand tonnes),Thousand tonnes,Jan,2.9
3,2005,Butter (Thousand tonnes),Thousand tonnes,Jan,3.4
4,2006,Butter (Thousand tonnes),Thousand tonnes,Jan,3.9
...,...,...,...,...,...
3487,2021,Heifers 200-249kg,Euro,Dec,490.37
3488,2021,Heifers 250-299kg,Euro,Dec,623.57
3489,2021,Heifers 300-349kg,Euro,Dec,693.47
3490,2021,Heifers 350-399kg,Euro,Dec,806.0


### Remove duplicate data

In [None]:
df_melt2 = df_melt2.drop_duplicates()

In [None]:
df_melt2

Unnamed: 0,Year,Category,Unit,Month,Amount
0,2002,Butter (Thousand tonnes),Thousand tonnes,Jan,2.3
1,2003,Butter (Thousand tonnes),Thousand tonnes,Jan,2.7
2,2004,Butter (Thousand tonnes),Thousand tonnes,Jan,2.9
3,2005,Butter (Thousand tonnes),Thousand tonnes,Jan,3.4
4,2006,Butter (Thousand tonnes),Thousand tonnes,Jan,3.9
...,...,...,...,...,...
3487,2021,Heifers 200-249kg,Euro,Dec,490.37
3488,2021,Heifers 250-299kg,Euro,Dec,623.57
3489,2021,Heifers 300-349kg,Euro,Dec,693.47
3490,2021,Heifers 350-399kg,Euro,Dec,806.0


In [None]:
df_melt2 = df_melt2.reset_index().drop("index", axis=1)

In [None]:
df_melt2

Unnamed: 0,Year,Category,Unit,Month,Amount
0,2002,Butter (Thousand tonnes),Thousand tonnes,Jan,2.3
1,2003,Butter (Thousand tonnes),Thousand tonnes,Jan,2.7
2,2004,Butter (Thousand tonnes),Thousand tonnes,Jan,2.9
3,2005,Butter (Thousand tonnes),Thousand tonnes,Jan,3.4
4,2006,Butter (Thousand tonnes),Thousand tonnes,Jan,3.9
...,...,...,...,...,...
3487,2021,Heifers 200-249kg,Euro,Dec,490.37
3488,2021,Heifers 250-299kg,Euro,Dec,623.57
3489,2021,Heifers 300-349kg,Euro,Dec,693.47
3490,2021,Heifers 350-399kg,Euro,Dec,806.0


In [None]:
df_melt2["Year"] = df_melt2["Year"].astype("int64")

In [None]:
df_melt2

Unnamed: 0,Year,Category,Unit,Month,Amount
0,2002,Butter (Thousand tonnes),Thousand tonnes,Jan,2.3
1,2003,Butter (Thousand tonnes),Thousand tonnes,Jan,2.7
2,2004,Butter (Thousand tonnes),Thousand tonnes,Jan,2.9
3,2005,Butter (Thousand tonnes),Thousand tonnes,Jan,3.4
4,2006,Butter (Thousand tonnes),Thousand tonnes,Jan,3.9
...,...,...,...,...,...
3487,2021,Heifers 200-249kg,Euro,Dec,490.37
3488,2021,Heifers 250-299kg,Euro,Dec,623.57
3489,2021,Heifers 300-349kg,Euro,Dec,693.47
3490,2021,Heifers 350-399kg,Euro,Dec,806.0


### Convert Month value to numeric

In [None]:
df_melt2["Month"] = [months[x] for x in df_melt2["Month"]]

### Prepare data for pivoting by creating custom index

In [None]:
df_melt2["Index"] = ["-".join([str(x),str(y),str(z)]) for (x,y,z) in zip(df_melt2["Year"], df_melt2["Month"],
                                               df_melt2["Category"])]

In [None]:
df_melt2["Index"]

0       2002-1-Butter (Thousand tonnes)
1       2003-1-Butter (Thousand tonnes)
2       2004-1-Butter (Thousand tonnes)
3       2005-1-Butter (Thousand tonnes)
4       2006-1-Butter (Thousand tonnes)
                     ...               
3487          2021-12-Heifers 200-249kg
3488          2021-12-Heifers 250-299kg
3489          2021-12-Heifers 300-349kg
3490          2021-12-Heifers 350-399kg
3491          2021-12-Heifers 400-449kg
Name: Index, Length: 3492, dtype: object

### Pivot dataset, using custom index to keep reference to values, using Category as the new values

In [None]:
df_pivot2 = df_melt2.pivot(index="Index", columns=["Category"], values="Amount").reset_index()

In [None]:
df_pivot2

Category,Index,Average price of raw milk from Ireland (Euro per 100kg),Butter (Thousand tonnes),Cheese (Thousand tonnes),Cow slaughterings (Thousand tonnes),Domestic milk intake (Million litres),Fat content (Percent),Heifers 200-249kg,Heifers 250-299kg,Heifers 300-349kg,Heifers 350-399kg,Heifers 400-449kg,Imported milk intake (Million litres),Skimmed & semi-skimmed milk sales (Million litres),Skimmed milk powder (Thousand tonnes),Whole milk sales (Million litres)
0,2002-1-Average price of raw milk from Ireland ...,28.39,,,,,,,,,,,,,,
1,2002-1-Butter (Thousand tonnes),,2.3,,,,,,,,,,,,,
2,2002-1-Cheese (Thousand tonnes),,,1.2,,,,,,,,,,,,
3,2002-1-Domestic milk intake (Million litres),,,,,117.3,,,,,,,,,,
4,2002-1-Fat content (Percent),,,,,,3.82,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3487,2021-9-Heifers 400-449kg,,,,,,,,,,,915.68,,,,
3488,2021-9-Imported milk intake (Million litres),,,,,,,,,,,,0.0,,,
3489,2021-9-Skimmed & semi-skimmed milk sales (Mill...,,,,,,,,,,,,,15.1,,
3490,2021-9-Skimmed milk powder (Thousand tonnes),,,,,,,,,,,,,,11.1,


### Reset index and divide up into relevant columns

In [None]:
df_pivot2 = df_pivot2.reset_index().drop("index", axis=1)

In [None]:
df_pivot2

Category,Index,Average price of raw milk from Ireland (Euro per 100kg),Butter (Thousand tonnes),Cheese (Thousand tonnes),Cow slaughterings (Thousand tonnes),Domestic milk intake (Million litres),Fat content (Percent),Heifers 200-249kg,Heifers 250-299kg,Heifers 300-349kg,Heifers 350-399kg,Heifers 400-449kg,Imported milk intake (Million litres),Skimmed & semi-skimmed milk sales (Million litres),Skimmed milk powder (Thousand tonnes),Whole milk sales (Million litres)
0,2002-1-Average price of raw milk from Ireland ...,28.39,,,,,,,,,,,,,,
1,2002-1-Butter (Thousand tonnes),,2.3,,,,,,,,,,,,,
2,2002-1-Cheese (Thousand tonnes),,,1.2,,,,,,,,,,,,
3,2002-1-Domestic milk intake (Million litres),,,,,117.3,,,,,,,,,,
4,2002-1-Fat content (Percent),,,,,,3.82,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3487,2021-9-Heifers 400-449kg,,,,,,,,,,,915.68,,,,
3488,2021-9-Imported milk intake (Million litres),,,,,,,,,,,,0.0,,,
3489,2021-9-Skimmed & semi-skimmed milk sales (Mill...,,,,,,,,,,,,,15.1,,
3490,2021-9-Skimmed milk powder (Thousand tonnes),,,,,,,,,,,,,,11.1,


In [None]:
df_pivot2["Index"] = [x.split("-") for x in df_pivot2.Index]

In [None]:
df_pivot2

Category,Index,Average price of raw milk from Ireland (Euro per 100kg),Butter (Thousand tonnes),Cheese (Thousand tonnes),Cow slaughterings (Thousand tonnes),Domestic milk intake (Million litres),Fat content (Percent),Heifers 200-249kg,Heifers 250-299kg,Heifers 300-349kg,Heifers 350-399kg,Heifers 400-449kg,Imported milk intake (Million litres),Skimmed & semi-skimmed milk sales (Million litres),Skimmed milk powder (Thousand tonnes),Whole milk sales (Million litres)
0,"[2002, 1, Average price of raw milk from Irela...",28.39,,,,,,,,,,,,,,
1,"[2002, 1, Butter (Thousand tonnes)]",,2.3,,,,,,,,,,,,,
2,"[2002, 1, Cheese (Thousand tonnes)]",,,1.2,,,,,,,,,,,,
3,"[2002, 1, Domestic milk intake (Million litres)]",,,,,117.3,,,,,,,,,,
4,"[2002, 1, Fat content (Percent)]",,,,,,3.82,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3487,"[2021, 9, Heifers 400, 449kg]",,,,,,,,,,,915.68,,,,
3488,"[2021, 9, Imported milk intake (Million litres)]",,,,,,,,,,,,0.0,,,
3489,"[2021, 9, Skimmed & semi, skimmed milk sales (...",,,,,,,,,,,,,15.1,,
3490,"[2021, 9, Skimmed milk powder (Thousand tonnes)]",,,,,,,,,,,,,,11.1,


In [None]:
df_pivot2["Year"] = [int(float(x[0])) for x in df_pivot2.Index]
df_pivot2["Month"] = [int(x[1]) for x in df_pivot2.Index]

In [None]:
df_pivot2

Category,Index,Average price of raw milk from Ireland (Euro per 100kg),Butter (Thousand tonnes),Cheese (Thousand tonnes),Cow slaughterings (Thousand tonnes),Domestic milk intake (Million litres),Fat content (Percent),Heifers 200-249kg,Heifers 250-299kg,Heifers 300-349kg,Heifers 350-399kg,Heifers 400-449kg,Imported milk intake (Million litres),Skimmed & semi-skimmed milk sales (Million litres),Skimmed milk powder (Thousand tonnes),Whole milk sales (Million litres),Year,Month
0,"[2002, 1, Average price of raw milk from Irela...",28.39,,,,,,,,,,,,,,,2002,1
1,"[2002, 1, Butter (Thousand tonnes)]",,2.3,,,,,,,,,,,,,,2002,1
2,"[2002, 1, Cheese (Thousand tonnes)]",,,1.2,,,,,,,,,,,,,2002,1
3,"[2002, 1, Domestic milk intake (Million litres)]",,,,,117.3,,,,,,,,,,,2002,1
4,"[2002, 1, Fat content (Percent)]",,,,,,3.82,,,,,,,,,,2002,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3487,"[2021, 9, Heifers 400, 449kg]",,,,,,,,,,,915.68,,,,,2021,9
3488,"[2021, 9, Imported milk intake (Million litres)]",,,,,,,,,,,,0.0,,,,2021,9
3489,"[2021, 9, Skimmed & semi, skimmed milk sales (...",,,,,,,,,,,,,15.1,,,2021,9
3490,"[2021, 9, Skimmed milk powder (Thousand tonnes)]",,,,,,,,,,,,,,11.1,,2021,9


### Sort columns

In [None]:
cols_sorted = list(df_pivot2.columns[-2:]) + list(df_pivot2.columns[1:-2])
cols_sorted

['Year',
 'Month',
 'Average price of raw milk from Ireland (Euro per 100kg)',
 'Butter (Thousand tonnes)',
 'Cheese (Thousand tonnes)',
 'Cow slaughterings (Thousand tonnes)',
 'Domestic milk intake (Million litres)',
 'Fat content (Percent)',
 'Heifers 200-249kg',
 'Heifers 250-299kg',
 'Heifers 300-349kg',
 'Heifers 350-399kg',
 'Heifers 400-449kg',
 'Imported milk intake (Million litres)',
 'Skimmed & semi-skimmed milk sales (Million litres)',
 'Skimmed milk powder (Thousand tonnes)',
 'Whole milk sales (Million litres)']

In [None]:
df_pivot2 = df_pivot2[cols_sorted]

In [None]:
df_pivot2

Category,Year,Month,Average price of raw milk from Ireland (Euro per 100kg),Butter (Thousand tonnes),Cheese (Thousand tonnes),Cow slaughterings (Thousand tonnes),Domestic milk intake (Million litres),Fat content (Percent),Heifers 200-249kg,Heifers 250-299kg,Heifers 300-349kg,Heifers 350-399kg,Heifers 400-449kg,Imported milk intake (Million litres),Skimmed & semi-skimmed milk sales (Million litres),Skimmed milk powder (Thousand tonnes),Whole milk sales (Million litres)
0,2002,1,28.39,,,,,,,,,,,,,,
1,2002,1,,2.3,,,,,,,,,,,,,
2,2002,1,,,1.2,,,,,,,,,,,,
3,2002,1,,,,,117.3,,,,,,,,,,
4,2002,1,,,,,,3.82,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3487,2021,9,,,,,,,,,,,915.68,,,,
3488,2021,9,,,,,,,,,,,,0.0,,,
3489,2021,9,,,,,,,,,,,,,15.1,,
3490,2021,9,,,,,,,,,,,,,,11.1,


In [None]:
df_pivot2 = df_pivot2.rename({"*": 0}, axis=1)

In [None]:
df_pivot2 = df_pivot2.fillna(0)

In [None]:
df_pivot2

Category,Year,Month,Average price of raw milk from Ireland (Euro per 100kg),Butter (Thousand tonnes),Cheese (Thousand tonnes),Cow slaughterings (Thousand tonnes),Domestic milk intake (Million litres),Fat content (Percent),Heifers 200-249kg,Heifers 250-299kg,Heifers 300-349kg,Heifers 350-399kg,Heifers 400-449kg,Imported milk intake (Million litres),Skimmed & semi-skimmed milk sales (Million litres),Skimmed milk powder (Thousand tonnes),Whole milk sales (Million litres)
0,2002,1,28.39,0.0,0.0,0,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0
1,2002,1,0.00,2.3,0.0,0,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0
2,2002,1,0.00,0.0,1.2,0,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0
3,2002,1,0.00,0.0,0.0,0,117.3,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0
4,2002,1,0.00,0.0,0.0,0,0.0,3.82,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3487,2021,9,0.00,0.0,0.0,0,0.0,0.00,0.0,0.0,0.0,0.0,915.68,0.0,0.0,0.0,0.0
3488,2021,9,0.00,0.0,0.0,0,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0
3489,2021,9,0.00,0.0,0.0,0,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0,15.1,0.0,0.0
3490,2021,9,0.00,0.0,0.0,0,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,11.1,0.0


### Group by Year and month to allow dataset to represent all data per year and month in one row

In [None]:
df2_pivot[df2_pivot.columns[3:]] = df2_pivot[df2_pivot.columns[3:]].astype("float64")

In [None]:
df_nuisance = df_pivot2[["Year", "Month", "Skimmed milk powder (Thousand tonnes)"]]

In [None]:
df_nuisance

Category,Year,Month,Skimmed milk powder (Thousand tonnes)
0,2002,1,0.0
1,2002,1,0.0
2,2002,1,0.0
3,2002,1,0.0
4,2002,1,0.0
...,...,...,...
3487,2021,9,0.0
3488,2021,9,0.0
3489,2021,9,0.0
3490,2021,9,11.1


In [None]:
df_nuisance = df_nuisance.groupby(["Year", "Month"]).agg({"Skimmed milk powder (Thousand tonnes)": "sum"}).reset_index()

In [None]:
df_nuisance

Category,Year,Month,Skimmed milk powder (Thousand tonnes)
0,2002,1,3.1
1,2002,2,2.9
2,2002,3,5.3
3,2002,4,8.9
4,2002,5,11.8
...,...,...,...
235,2021,8,16.4
236,2021,9,11.1
237,2021,10,5.5
238,2021,11,0.0


In [None]:
df2_pivot = df_pivot2.groupby(["Year", "Month"]).sum()

In [None]:
df2_pivot = df2_pivot.reset_index()

In [None]:
df2_pivot

Category,Year,Month,Average price of raw milk from Ireland (Euro per 100kg),Butter (Thousand tonnes),Cheese (Thousand tonnes),Domestic milk intake (Million litres),Fat content (Percent),Heifers 200-249kg,Heifers 250-299kg,Heifers 300-349kg,Heifers 350-399kg,Heifers 400-449kg,Imported milk intake (Million litres),Skimmed & semi-skimmed milk sales (Million litres),Skimmed milk powder (Thousand tonnes),Whole milk sales (Million litres)
0,2002,1,28.39,2.3,1.2,117.3,3.82,274.71,325.25,390.32,467.19,559.87,16.8,9.9,3.1,36.1
1,2002,2,27.35,3.8,1.7,165.2,3.77,286.94,328.66,398.31,522.77,576.65,21.9,9.3,2.9,32.9
2,2002,3,25.99,9.8,5.2,337.5,3.73,283.65,332.73,437.81,519.85,566.68,29.2,10.1,5.3,34.6
3,2002,4,25.47,16.6,13.7,596.4,3.60,279.94,380.13,455.14,510.17,589.39,31.2,10.0,8.9,34.7
4,2002,5,25.57,20.1,14.8,709.0,3.59,310.74,388.89,440.45,528.76,569.65,31.6,10.3,11.8,35.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,2021,8,39.23,28.8,29.3,917.4,4.19,480.00,598.07,689.20,801.61,913.59,0.0,15.8,16.4,29.0
236,2021,9,42.44,26.5,33.2,776.7,4.43,450.74,570.54,694.88,804.71,915.68,0.0,15.1,11.1,26.6
237,2021,10,46.52,21.6,27.5,652.8,4.77,516.00,529.67,656.53,777.37,890.95,0.0,15.8,5.5,26.1
238,2021,11,48.65,17.8,20.9,460.6,4.90,419.66,547.42,641.39,762.83,888.34,0.0,15.4,0.0,25.5


In [None]:
df2_pivot = df2_pivot.sort_values(["Year", "Month"])

In [None]:
df2_pivot

Category,Year,Month,Average price of raw milk from Ireland (Euro per 100kg),Butter (Thousand tonnes),Cheese (Thousand tonnes),Domestic milk intake (Million litres),Fat content (Percent),Heifers 200-249kg,Heifers 250-299kg,Heifers 300-349kg,Heifers 350-399kg,Heifers 400-449kg,Imported milk intake (Million litres),Skimmed & semi-skimmed milk sales (Million litres),Skimmed milk powder (Thousand tonnes),Whole milk sales (Million litres)
0,2002,1,28.39,2.3,1.2,117.3,3.82,274.71,325.25,390.32,467.19,559.87,16.8,9.9,3.1,36.1
1,2002,2,27.35,3.8,1.7,165.2,3.77,286.94,328.66,398.31,522.77,576.65,21.9,9.3,2.9,32.9
2,2002,3,25.99,9.8,5.2,337.5,3.73,283.65,332.73,437.81,519.85,566.68,29.2,10.1,5.3,34.6
3,2002,4,25.47,16.6,13.7,596.4,3.60,279.94,380.13,455.14,510.17,589.39,31.2,10.0,8.9,34.7
4,2002,5,25.57,20.1,14.8,709.0,3.59,310.74,388.89,440.45,528.76,569.65,31.6,10.3,11.8,35.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,2021,8,39.23,28.8,29.3,917.4,4.19,480.00,598.07,689.20,801.61,913.59,0.0,15.8,16.4,29.0
236,2021,9,42.44,26.5,33.2,776.7,4.43,450.74,570.54,694.88,804.71,915.68,0.0,15.1,11.1,26.6
237,2021,10,46.52,21.6,27.5,652.8,4.77,516.00,529.67,656.53,777.37,890.95,0.0,15.8,5.5,26.1
238,2021,11,48.65,17.8,20.9,460.6,4.90,419.66,547.42,641.39,762.83,888.34,0.0,15.4,0.0,25.5


In [None]:
df2_pivot

Category,Year,Month,Average price of raw milk from Ireland (Euro per 100kg),Butter (Thousand tonnes),Cheese (Thousand tonnes),Domestic milk intake (Million litres),Fat content (Percent),Heifers 200-249kg,Heifers 250-299kg,Heifers 300-349kg,Heifers 350-399kg,Heifers 400-449kg,Imported milk intake (Million litres),Skimmed & semi-skimmed milk sales (Million litres),Skimmed milk powder (Thousand tonnes),Whole milk sales (Million litres)
0,2002,1,28.39,2.3,1.2,117.3,3.82,274.71,325.25,390.32,467.19,559.87,16.8,9.9,3.1,36.1
1,2002,2,27.35,3.8,1.7,165.2,3.77,286.94,328.66,398.31,522.77,576.65,21.9,9.3,2.9,32.9
2,2002,3,25.99,9.8,5.2,337.5,3.73,283.65,332.73,437.81,519.85,566.68,29.2,10.1,5.3,34.6
3,2002,4,25.47,16.6,13.7,596.4,3.60,279.94,380.13,455.14,510.17,589.39,31.2,10.0,8.9,34.7
4,2002,5,25.57,20.1,14.8,709.0,3.59,310.74,388.89,440.45,528.76,569.65,31.6,10.3,11.8,35.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,2021,8,39.23,28.8,29.3,917.4,4.19,480.00,598.07,689.20,801.61,913.59,0.0,15.8,16.4,29.0
236,2021,9,42.44,26.5,33.2,776.7,4.43,450.74,570.54,694.88,804.71,915.68,0.0,15.1,11.1,26.6
237,2021,10,46.52,21.6,27.5,652.8,4.77,516.00,529.67,656.53,777.37,890.95,0.0,15.8,5.5,26.1
238,2021,11,48.65,17.8,20.9,460.6,4.90,419.66,547.42,641.39,762.83,888.34,0.0,15.4,0.0,25.5


### Impute values

In [None]:
from sklearn.impute import KNNImputer

imputer = KNNImputer(n_neighbors=8)

In [None]:
df2_pivot

Category,Year,Month,Average price of raw milk from Ireland (Euro per 100kg),Butter (Thousand tonnes),Cheese (Thousand tonnes),Domestic milk intake (Million litres),Fat content (Percent),Heifers 200-249kg,Heifers 250-299kg,Heifers 300-349kg,Heifers 350-399kg,Heifers 400-449kg,Imported milk intake (Million litres),Skimmed & semi-skimmed milk sales (Million litres),Skimmed milk powder (Thousand tonnes),Whole milk sales (Million litres)
0,2002,1,28.39,2.3,1.2,117.3,3.82,274.71,325.25,390.32,467.19,559.87,16.8,9.9,3.1,36.1
1,2002,2,27.35,3.8,1.7,165.2,3.77,286.94,328.66,398.31,522.77,576.65,21.9,9.3,2.9,32.9
2,2002,3,25.99,9.8,5.2,337.5,3.73,283.65,332.73,437.81,519.85,566.68,29.2,10.1,5.3,34.6
3,2002,4,25.47,16.6,13.7,596.4,3.60,279.94,380.13,455.14,510.17,589.39,31.2,10.0,8.9,34.7
4,2002,5,25.57,20.1,14.8,709.0,3.59,310.74,388.89,440.45,528.76,569.65,31.6,10.3,11.8,35.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,2021,8,39.23,28.8,29.3,917.4,4.19,480.00,598.07,689.20,801.61,913.59,0.0,15.8,16.4,29.0
236,2021,9,42.44,26.5,33.2,776.7,4.43,450.74,570.54,694.88,804.71,915.68,0.0,15.1,11.1,26.6
237,2021,10,46.52,21.6,27.5,652.8,4.77,516.00,529.67,656.53,777.37,890.95,0.0,15.8,5.5,26.1
238,2021,11,48.65,17.8,20.9,460.6,4.90,419.66,547.42,641.39,762.83,888.34,0.0,15.4,0.0,25.5


In [None]:
targets = list(list(df2_pivot.columns[:12])+list(df2_pivot.columns[13:]))

In [None]:
targets

['Year',
 'Month',
 'Average price of raw milk from Ireland (Euro per 100kg)',
 'Butter (Thousand tonnes)',
 'Cheese (Thousand tonnes)',
 'Domestic milk intake (Million litres)',
 'Fat content (Percent)',
 'Heifers 200-249kg',
 'Heifers 250-299kg',
 'Heifers 300-349kg',
 'Heifers 350-399kg',
 'Heifers 400-449kg',
 'Skimmed & semi-skimmed milk sales (Million litres)',
 'Skimmed milk powder (Thousand tonnes)',
 'Whole milk sales (Million litres)']

In [None]:
df2_imputed = df2_pivot[targets]
df2_imputed = df2_imputed.replace(0, np.nan)
y3 = df2_pivot[df2_pivot.columns[12]]
y4 = imputer.fit_transform(df2_imputed)

In [None]:
len(y3)

240

In [None]:
y4

array([[2.0020e+03, 1.0000e+00, 2.8390e+01, ..., 9.9000e+00, 3.1000e+00,
        3.6100e+01],
       [2.0020e+03, 2.0000e+00, 2.7350e+01, ..., 9.3000e+00, 2.9000e+00,
        3.2900e+01],
       [2.0020e+03, 3.0000e+00, 2.5990e+01, ..., 1.0100e+01, 5.3000e+00,
        3.4600e+01],
       ...,
       [2.0210e+03, 1.0000e+01, 4.6520e+01, ..., 1.5800e+01, 5.5000e+00,
        2.6100e+01],
       [2.0210e+03, 1.1000e+01, 4.8650e+01, ..., 1.5400e+01, 5.2625e+00,
        2.5500e+01],
       [2.0210e+03, 1.2000e+01, 4.8460e+01, ..., 1.5000e+01, 7.4000e+00,
        2.5200e+01]])

In [None]:
df2_imputed = pd.DataFrame(y4, columns=targets)

In [None]:
df2_imputed[df2_pivot.columns[12]] = y3

In [None]:
df2_imputed

Unnamed: 0,Year,Month,Average price of raw milk from Ireland (Euro per 100kg),Butter (Thousand tonnes),Cheese (Thousand tonnes),Domestic milk intake (Million litres),Fat content (Percent),Heifers 200-249kg,Heifers 250-299kg,Heifers 300-349kg,Heifers 350-399kg,Heifers 400-449kg,Skimmed & semi-skimmed milk sales (Million litres),Skimmed milk powder (Thousand tonnes),Whole milk sales (Million litres),Imported milk intake (Million litres)
0,2002.0,1.0,28.39,2.3,1.2,117.3,3.82,274.71,325.25,390.32,467.19,559.87,9.9,3.1000,36.1,16.8
1,2002.0,2.0,27.35,3.8,1.7,165.2,3.77,286.94,328.66,398.31,522.77,576.65,9.3,2.9000,32.9,21.9
2,2002.0,3.0,25.99,9.8,5.2,337.5,3.73,283.65,332.73,437.81,519.85,566.68,10.1,5.3000,34.6,29.2
3,2002.0,4.0,25.47,16.6,13.7,596.4,3.60,279.94,380.13,455.14,510.17,589.39,10.0,8.9000,34.7,31.2
4,2002.0,5.0,25.57,20.1,14.8,709.0,3.59,310.74,388.89,440.45,528.76,569.65,10.3,11.8000,35.9,31.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,2021.0,8.0,39.23,28.8,29.3,917.4,4.19,480.00,598.07,689.20,801.61,913.59,15.8,16.4000,29.0,0.0
236,2021.0,9.0,42.44,26.5,33.2,776.7,4.43,450.74,570.54,694.88,804.71,915.68,15.1,11.1000,26.6,0.0
237,2021.0,10.0,46.52,21.6,27.5,652.8,4.77,516.00,529.67,656.53,777.37,890.95,15.8,5.5000,26.1,0.0
238,2021.0,11.0,48.65,17.8,20.9,460.6,4.90,419.66,547.42,641.39,762.83,888.34,15.4,5.2625,25.5,0.0


In [None]:
df2_imputed = df2_imputed.rename(translations, axis=1)

In [None]:
df2_imputed

Unnamed: 0,Year,Month,Raw milk price,Butter,Cheese,Domestic milk intake,Fat content (Percent),Heifers 200-249kg,Heifers 250-299kg,Heifers 300-349kg,Heifers 350-399kg,Heifers 400-449kg,Skimmed milk sales,Skimmed milk powder,Whole milk sales,Imported milk intake
0,2002.0,1.0,28.39,2.3,1.2,117.3,3.82,274.71,325.25,390.32,467.19,559.87,9.9,3.1000,36.1,16.8
1,2002.0,2.0,27.35,3.8,1.7,165.2,3.77,286.94,328.66,398.31,522.77,576.65,9.3,2.9000,32.9,21.9
2,2002.0,3.0,25.99,9.8,5.2,337.5,3.73,283.65,332.73,437.81,519.85,566.68,10.1,5.3000,34.6,29.2
3,2002.0,4.0,25.47,16.6,13.7,596.4,3.60,279.94,380.13,455.14,510.17,589.39,10.0,8.9000,34.7,31.2
4,2002.0,5.0,25.57,20.1,14.8,709.0,3.59,310.74,388.89,440.45,528.76,569.65,10.3,11.8000,35.9,31.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,2021.0,8.0,39.23,28.8,29.3,917.4,4.19,480.00,598.07,689.20,801.61,913.59,15.8,16.4000,29.0,0.0
236,2021.0,9.0,42.44,26.5,33.2,776.7,4.43,450.74,570.54,694.88,804.71,915.68,15.1,11.1000,26.6,0.0
237,2021.0,10.0,46.52,21.6,27.5,652.8,4.77,516.00,529.67,656.53,777.37,890.95,15.8,5.5000,26.1,0.0
238,2021.0,11.0,48.65,17.8,20.9,460.6,4.90,419.66,547.42,641.39,762.83,888.34,15.4,5.2625,25.5,0.0


In [None]:
df2_imputed.to_csv(cwd+"milk dataset 2.csv")

## Netherlands dataset

In [None]:
dfn = pd.read_csv(cwd+"netherlands milk dataset.csv")

In [None]:
dfn

Unnamed: 0,Periods,Volume,Fat content,Butter,Cheese,Skimmed-milk powder,Concentrated milk
0,2002 April,882.72,4.53,12.02,53.17,6.52,22.46
1,2002 August,873.31,4.15,8.12,53.05,4.23,22.73
2,2002 December,894.93,4.62,11.06,55.78,6.32,24.14
3,2002 February,775.81,4.54,10.28,46.14,6.42,23.13
4,2002 January,888.93,4.64,11.91,54.34,7.23,21.94
...,...,...,...,...,...,...,...
235,2021 March,1214.58,4.57,13.38,84.30,7.21,31.27
236,2021 May,1213.26,4.41,11.66,83.84,10.11,33.34
237,2021 November,1059.60,4.53,9.58,75.36,8.46,31.66
238,2021 October,1087.52,4.47,11.24,77.56,5.55,31.83


### Split Periods into Year and Month features

In [None]:
dfn["Periods"] = [x.split(" ") for x in dfn["Periods"]]

In [None]:
dfn

Unnamed: 0,Periods,Volume,Fat content,Butter,Cheese,Skimmed-milk powder,Concentrated milk
0,"[2002, April]",882.72,4.53,12.02,53.17,6.52,22.46
1,"[2002, August]",873.31,4.15,8.12,53.05,4.23,22.73
2,"[2002, December]",894.93,4.62,11.06,55.78,6.32,24.14
3,"[2002, February]",775.81,4.54,10.28,46.14,6.42,23.13
4,"[2002, January]",888.93,4.64,11.91,54.34,7.23,21.94
...,...,...,...,...,...,...,...
235,"[2021, March]",1214.58,4.57,13.38,84.30,7.21,31.27
236,"[2021, May]",1213.26,4.41,11.66,83.84,10.11,33.34
237,"[2021, November]",1059.60,4.53,9.58,75.36,8.46,31.66
238,"[2021, October]",1087.52,4.47,11.24,77.56,5.55,31.83


In [None]:
dfn["Year"] = [x[0] for x in dfn["Periods"]]
dfn["Month"] = [x[1] for x in dfn["Periods"]]

In [None]:
dfn["Month"]

0          April
1         August
2       December
3       February
4        January
         ...    
235        March
236          May
237     November
238      October
239    September
Name: Month, Length: 240, dtype: object

###  Rename months

In [None]:
dfn["Month"] = [month_names[x] for x in dfn["Month"]]

In [None]:
dfn["Month"] = [months[x] for x in dfn["Month"]]

In [None]:
dfn

Unnamed: 0,Periods,Volume,Fat content,Butter,Cheese,Skimmed-milk powder,Concentrated milk,Year,Month
0,"[2002, April]",882.72,4.53,12.02,53.17,6.52,22.46,2002,4
1,"[2002, August]",873.31,4.15,8.12,53.05,4.23,22.73,2002,8
2,"[2002, December]",894.93,4.62,11.06,55.78,6.32,24.14,2002,12
3,"[2002, February]",775.81,4.54,10.28,46.14,6.42,23.13,2002,2
4,"[2002, January]",888.93,4.64,11.91,54.34,7.23,21.94,2002,1
...,...,...,...,...,...,...,...,...,...
235,"[2021, March]",1214.58,4.57,13.38,84.30,7.21,31.27,2021,3
236,"[2021, May]",1213.26,4.41,11.66,83.84,10.11,33.34,2021,5
237,"[2021, November]",1059.60,4.53,9.58,75.36,8.46,31.66,2021,11
238,"[2021, October]",1087.52,4.47,11.24,77.56,5.55,31.83,2021,10


### Sort columns

In [None]:
cols = list(dfn.columns[-2:]) + list(dfn.columns[1:-2])
cols

['Year',
 'Month',
 'Volume',
 'Fat content',
 'Butter',
 'Cheese',
 'Skimmed-milk powder',
 'Concentrated milk']

In [None]:
cols

['Year',
 'Month',
 'Volume',
 'Fat content',
 'Butter',
 'Cheese',
 'Skimmed-milk powder',
 'Concentrated milk']

In [None]:
dfn = dfn[cols]

In [None]:
dfn

Unnamed: 0,Year,Month,Volume,Fat content,Butter,Cheese,Skimmed-milk powder,Concentrated milk
0,2002,4,882.72,4.53,12.02,53.17,6.52,22.46
1,2002,8,873.31,4.15,8.12,53.05,4.23,22.73
2,2002,12,894.93,4.62,11.06,55.78,6.32,24.14
3,2002,2,775.81,4.54,10.28,46.14,6.42,23.13
4,2002,1,888.93,4.64,11.91,54.34,7.23,21.94
...,...,...,...,...,...,...,...,...
235,2021,3,1214.58,4.57,13.38,84.30,7.21,31.27
236,2021,5,1213.26,4.41,11.66,83.84,10.11,33.34
237,2021,11,1059.60,4.53,9.58,75.36,8.46,31.66
238,2021,10,1087.52,4.47,11.24,77.56,5.55,31.83


### Convert volume into Million litres (using formula 1000 tonnes == 971164 litres)

In [None]:
dfn["Volume"] = [float(x)*(.971164) for x in dfn["Volume"]]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [None]:
dfn["Year"] = [int(i) for i in dfn["Year"]]
dfn["Month"] = [int(i) for i in dfn["Month"]]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [None]:
dfn

Unnamed: 0,Year,Month,Volume,Fat content,Butter,Cheese,Skimmed-milk powder,Concentrated milk
0,2002,4,857.265886,4.53,12.02,53.17,6.52,22.46
1,2002,8,848.127233,4.15,8.12,53.05,4.23,22.73
2,2002,12,869.123799,4.62,11.06,55.78,6.32,24.14
3,2002,2,753.438743,4.54,10.28,46.14,6.42,23.13
4,2002,1,863.296815,4.64,11.91,54.34,7.23,21.94
...,...,...,...,...,...,...,...,...
235,2021,3,1179.556371,4.57,13.38,84.30,7.21,31.27
236,2021,5,1178.274435,4.41,11.66,83.84,10.11,33.34
237,2021,11,1029.045374,4.53,9.58,75.36,8.46,31.66
238,2021,10,1056.160273,4.47,11.24,77.56,5.55,31.83


### Extract Netherlands Raw values from Average price dataset

In [None]:
df4_netherlands = df4_pivot[df4_pivot.Category == "Average price of raw milk from Netherlands"]
df4_netherlands

Month,Year,Category,Unit,Apr,Aug,Dec,Feb,Jan,Jul,Jun,Mar,May,Nov,Oct,Sep
1,2002,Average price of raw milk from Netherlands,Euro per 100kg,30.42,35.02,33.52,32.99,33.49,30.34,30.27,31.04,30.25,35.83,35.49,35.12
3,2003,Average price of raw milk from Netherlands,Euro per 100kg,28.82,34.58,32.91,30.04,30.61,29.6,29.14,28.82,29.14,35.19,34.85,34.58
5,2004,Average price of raw milk from Netherlands,Euro per 100kg,26.0401,31.8401,31.7101,28.1401,29.6301,30.4701,26.0401,26.5701,26.0401,32.3601,32.1401,31.9501
7,2005,Average price of raw milk from Netherlands,Euro per 100kg,26.27,31.8,29.27,27.97,28.31,26.44,26.27,26.23,26.27,31.81,31.8,31.8
9,2006,Average price of raw milk from Netherlands,Euro per 100kg,25.44,30.92,27.66,27.64,27.64,25.49,25.29,25.73,25.44,30.78,30.78,30.92
11,2007,Average price of raw milk from Netherlands,Euro per 100kg,26.34,37.36,42.73,27.93,27.73,25.6,27.4,26.03,26.34,40.0,45.09,41.32
13,2008,Average price of raw milk from Netherlands,Euro per 100kg,35.78,37.99,32.95,38.83,38.97,32.52,32.21,36.29,34.5,36.44,37.13,37.45
15,2009,Average price of raw milk from Netherlands,Euro per 100kg,22.6,27.02,33.61,27.34,28.64,23.19,23.2,22.59,21.56,33.36,30.12,28.06
17,2010,Average price of raw milk from Netherlands,Euro per 100kg,26.55,33.75,35.65,28.18,29.66,33.75,31.83936,25.27,28.84,35.65,34.65,33.75
19,2011,Average price of raw milk from Netherlands,Euro per 100kg,36.0,37.25,37.5,35.65,35.65,38.0,38.0,36.0,38.0,37.25,37.0,36.25


In [None]:
df4_netherlands = df4_netherlands.rename(months, axis=1)

In [None]:
df4_netherlands = df4_netherlands.reset_index().drop("index", axis=1)

In [None]:
df4_netherlands

Month,Year,Category,Unit,4,8,12,2,1,7,6,3,5,11,10,9
0,2002,Average price of raw milk from Netherlands,Euro per 100kg,30.42,35.02,33.52,32.99,33.49,30.34,30.27,31.04,30.25,35.83,35.49,35.12
1,2003,Average price of raw milk from Netherlands,Euro per 100kg,28.82,34.58,32.91,30.04,30.61,29.6,29.14,28.82,29.14,35.19,34.85,34.58
2,2004,Average price of raw milk from Netherlands,Euro per 100kg,26.0401,31.8401,31.7101,28.1401,29.6301,30.4701,26.0401,26.5701,26.0401,32.3601,32.1401,31.9501
3,2005,Average price of raw milk from Netherlands,Euro per 100kg,26.27,31.8,29.27,27.97,28.31,26.44,26.27,26.23,26.27,31.81,31.8,31.8
4,2006,Average price of raw milk from Netherlands,Euro per 100kg,25.44,30.92,27.66,27.64,27.64,25.49,25.29,25.73,25.44,30.78,30.78,30.92
5,2007,Average price of raw milk from Netherlands,Euro per 100kg,26.34,37.36,42.73,27.93,27.73,25.6,27.4,26.03,26.34,40.0,45.09,41.32
6,2008,Average price of raw milk from Netherlands,Euro per 100kg,35.78,37.99,32.95,38.83,38.97,32.52,32.21,36.29,34.5,36.44,37.13,37.45
7,2009,Average price of raw milk from Netherlands,Euro per 100kg,22.6,27.02,33.61,27.34,28.64,23.19,23.2,22.59,21.56,33.36,30.12,28.06
8,2010,Average price of raw milk from Netherlands,Euro per 100kg,26.55,33.75,35.65,28.18,29.66,33.75,31.83936,25.27,28.84,35.65,34.65,33.75
9,2011,Average price of raw milk from Netherlands,Euro per 100kg,36.0,37.25,37.5,35.65,35.65,38.0,38.0,36.0,38.0,37.25,37.0,36.25


### Melt datset to repesent raw values by Year and Month

In [None]:
df4_netherlands = df4_netherlands.melt(id_vars=["Year","Category","Unit"], value_vars=df4_netherlands[df4_netherlands.columns[3:]])

In [None]:
df4_netherlands["Category"] = [" ".join([x,("("+y+")")]) for (x,y) in zip(df4_netherlands["Category"],
                                                              df4_netherlands["Unit"])]

In [None]:
df4_netherlands

Unnamed: 0,Year,Category,Unit,Month,value
0,2002,Average price of raw milk from Netherlands (Eu...,Euro per 100kg,4,30.4200
1,2003,Average price of raw milk from Netherlands (Eu...,Euro per 100kg,4,28.8200
2,2004,Average price of raw milk from Netherlands (Eu...,Euro per 100kg,4,26.0401
3,2005,Average price of raw milk from Netherlands (Eu...,Euro per 100kg,4,26.2700
4,2006,Average price of raw milk from Netherlands (Eu...,Euro per 100kg,4,25.4400
...,...,...,...,...,...
235,2017,Average price of raw milk from Netherlands (Eu...,Euro per 100kg,9,40.5000
236,2018,Average price of raw milk from Netherlands (Eu...,Euro per 100kg,9,37.0000
237,2019,Average price of raw milk from Netherlands (Eu...,Euro per 100kg,9,35.0000
238,2020,Average price of raw milk from Netherlands (Eu...,Euro per 100kg,9,33.7500


### Pivot dataset to match format

In [None]:
df4_netherlands = df4_netherlands.pivot(index=["Year", "Month"], columns="Category", values="value")

In [None]:
df4_netherlands

Unnamed: 0_level_0,Category,Average price of raw milk from Netherlands (Euro per 100kg)
Year,Month,Unnamed: 2_level_1
2002,1,33.49
2002,2,32.99
2002,3,31.04
2002,4,30.42
2002,5,30.25
...,...,...
2021,8,37.75
2021,9,38.00
2021,10,39.50
2021,11,41.25


In [None]:
df4_netherlands = df4_netherlands.reset_index()

In [None]:
df4_netherlands

Category,Year,Month,Average price of raw milk from Netherlands (Euro per 100kg)
0,2002,1,33.49
1,2002,2,32.99
2,2002,3,31.04
3,2002,4,30.42
4,2002,5,30.25
...,...,...,...
235,2021,8,37.75
236,2021,9,38.00
237,2021,10,39.50
238,2021,11,41.25


### Join Raw milk value dataset with Netherlands dataset

In [None]:
df4_category = dfn.join(df4_netherlands["Average price of raw milk from Netherlands (Euro per 100kg)"])

In [None]:
df4_category

Unnamed: 0,Year,Month,Volume,Fat content,Butter,Cheese,Skimmed-milk powder,Concentrated milk,Average price of raw milk from Netherlands (Euro per 100kg)
0,2002,4,857.265886,4.53,12.02,53.17,6.52,22.46,33.49
1,2002,8,848.127233,4.15,8.12,53.05,4.23,22.73,32.99
2,2002,12,869.123799,4.62,11.06,55.78,6.32,24.14,31.04
3,2002,2,753.438743,4.54,10.28,46.14,6.42,23.13,30.42
4,2002,1,863.296815,4.64,11.91,54.34,7.23,21.94,30.25
...,...,...,...,...,...,...,...,...,...
235,2021,3,1179.556371,4.57,13.38,84.30,7.21,31.27,37.75
236,2021,5,1178.274435,4.41,11.66,83.84,10.11,33.34,38.00
237,2021,11,1029.045374,4.53,9.58,75.36,8.46,31.66,39.50
238,2021,10,1056.160273,4.47,11.24,77.56,5.55,31.83,41.25


### Impute values

In [None]:
from sklearn.impute import KNNImputer

imputer = KNNImputer(n_neighbors=4)
y = imputer.fit_transform(df4_category)
df4_imputed = pd.DataFrame(y)

In [None]:
df4_imputed.columns = df4_category.columns

In [None]:
df4_imputed = df4_imputed.rename(translations, axis=1)

In [None]:
df4_imputed

Unnamed: 0,Year,Month,Volume,Fat content,Butter,Cheese,Skimmed-milk powder,Concentrated milk,Average price of raw milk from Netherlands (Euro per 100kg)
0,2002.0,4.0,857.265886,4.53,12.02,53.17,6.52,22.46,33.49
1,2002.0,8.0,848.127233,4.15,8.12,53.05,4.23,22.73,32.99
2,2002.0,12.0,869.123799,4.62,11.06,55.78,6.32,24.14,31.04
3,2002.0,2.0,753.438743,4.54,10.28,46.14,6.42,23.13,30.42
4,2002.0,1.0,863.296815,4.64,11.91,54.34,7.23,21.94,30.25
...,...,...,...,...,...,...,...,...,...
235,2021.0,3.0,1179.556371,4.57,13.38,84.30,7.21,31.27,37.75
236,2021.0,5.0,1178.274435,4.41,11.66,83.84,10.11,33.34,38.00
237,2021.0,11.0,1029.045374,4.53,9.58,75.36,8.46,31.66,39.50
238,2021.0,10.0,1056.160273,4.47,11.24,77.56,5.55,31.83,41.25


In [None]:
df4_imputed.to_csv(cwd+"netherlands dataset output.csv")