# Data processing

In [1]:
import pandas as pd

fuel = pd.read_csv('../data/processed/fuel_clean.csv', parse_dates=['date'])
brent = pd.read_csv('../data/processed/brent_clean.csv', parse_dates=['date'])
usd   = pd.read_csv('../data/processed/usd_eur_clean.csv', parse_dates=['date'])

Brent and usd/eur datasets are weekly with Sunday labels, we can shift to Monday

In [2]:
brent['date'] = brent['date'] + pd.Timedelta(days=1)
usd['date']   = usd['date']   + pd.Timedelta(days=1)

Merge Brent with FX and convert to EUR

In [3]:
brent_fx = pd.merge(brent, usd, on='date', how='inner')
brent_fx['brent_eur'] = brent_fx['brent_usd'] * brent_fx['usd_eur']
brent_fx = brent_fx[['date', 'brent_eur']]

Merge with fuel prices

In [5]:
weekly = pd.merge(fuel, brent_fx, on='date', how='inner')

weekly.info()
weekly.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1048 entries, 0 to 1047
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   date       1048 non-null   datetime64[ns]
 1   benzina    1048 non-null   float64       
 2   gasolio    1048 non-null   float64       
 3   brent_eur  1048 non-null   float64       
dtypes: datetime64[ns](1), float64(3)
memory usage: 32.9 KB


Unnamed: 0,date,benzina,gasolio,brent_eur
0,2005-01-03,1.11575,1.01828,33.07588
1,2005-01-10,1.088,1.00439,34.44945
2,2005-01-17,1.08814,1.00431,35.047472
3,2005-01-24,1.09001,1.00431,34.463165
4,2005-01-31,1.13211,1.0226,34.093752


In [6]:
print(weekly.shape, weekly['date'].min(), weekly['date'].max())
print(weekly['date'].dt.day_name().unique())   # should show 'Monday'
assert weekly['date'].is_monotonic_increasing or weekly.sort_values('date', inplace=True)
assert not weekly.duplicated('date').any()
weekly.head()


(1048, 4) 2005-01-03 00:00:00 2025-11-03 00:00:00
['Monday']


Unnamed: 0,date,benzina,gasolio,brent_eur
0,2005-01-03,1.11575,1.01828,33.07588
1,2005-01-10,1.088,1.00439,34.44945
2,2005-01-17,1.08814,1.00431,35.047472
3,2005-01-24,1.09001,1.00431,34.463165
4,2005-01-31,1.13211,1.0226,34.093752


Save the processed dataset

In [7]:
weekly.to_csv('../data/processed/weekly_dataset.csv', index=False)
