# Scraping tables from websites
https://www.omip.pt/es/market-data/spot?date=2024-04-27&commodity=EL&zone=ES

## Download raw data

In [1]:
import pandas as pd

In [12]:
date='2024-04-27'

In [13]:
url = f'https://www.omip.pt/es/market-data/spot?date={date}&commodity=EL&zone=ES'
url

'https://www.omip.pt/es/market-data/spot?date=2024-04-27&commodity=EL&zone=ES'

In [4]:
dfs = pd.read_html(url)

In [5]:
dfs

[        Index  Price (€)  Chart
 0   SPEL BASE      25.02    NaN
 1         NaN        NaN    NaN
 2  SPEL SOLAR       3.51    NaN
 3         NaN        NaN    NaN,
       Hours  Price (€)  Volume (MWh)
 0   00h-01h      47.88       17459.0
 1   01h-02h      40.00       17285.8
 ..      ...        ...           ...
 22  22h-23h      62.78       16324.9
 23  23h-24h      58.52       17017.6
 
 [24 rows x 3 columns]]

In [7]:
dfs[0]

Unnamed: 0,Index,Price (€),Chart
0,SPEL BASE,25.02,
1,,,
2,SPEL SOLAR,3.51,
3,,,


In [8]:
dfs[1]

Unnamed: 0,Hours,Price (€),Volume (MWh)
0,00h-01h,47.88,17459.0
1,01h-02h,40.00,17285.8
...,...,...,...
22,22h-23h,62.78,16324.9
23,23h-24h,58.52,17017.6


## Write to file

In [9]:
df = dfs[1]

In [14]:
date

'2024-04-27'

In [15]:
df.to_csv(f'../data/raw/{date}.csv', index=False)

'../data/2024-04-27.csv'

## Refactor code into function

In [30]:
import os

In [34]:
def download_omie(date, path=None):
    
    url = f'https://www.omip.pt/es/market-data/spot?date={date}&commodity=EL&zone=ES'
    dfs = pd.read_html(url)
    df = dfs[1]

    if path:
        filename = f'{date}.csv'
        pathfile = os.path.join(path, filename)
        df.to_csv(pathfile, index=False)
    
    return df

## Today

In [35]:
from datetime import datetime

In [36]:
today = str(datetime.today().date())

In [37]:
today

'2024-05-08'

In [39]:
download_omie(date='2024-01-01', path='../data/raw/')

Unnamed: 0,Hours,Price (€),Volume (MWh)
0,00h-01h,63.33,8744.9
1,01h-02h,50.09,8671.4
...,...,...,...
22,22h-23h,47.50,11154.9
23,23h-24h,42.09,11000.1


In [44]:
dates = pd.date_range(
    start='2024-01-01',
    end='2024-01-31',
    freq='1D'
)

In [45]:
dates

DatetimeIndex(['2024-01-01', '2024-01-02', '2024-01-03', '2024-01-04',
               '2024-01-05', '2024-01-06', '2024-01-07', '2024-01-08',
               '2024-01-09', '2024-01-10', '2024-01-11', '2024-01-12',
               '2024-01-13', '2024-01-14', '2024-01-15', '2024-01-16',
               '2024-01-17', '2024-01-18', '2024-01-19', '2024-01-20',
               '2024-01-21', '2024-01-22', '2024-01-23', '2024-01-24',
               '2024-01-25', '2024-01-26', '2024-01-27', '2024-01-28',
               '2024-01-29', '2024-01-30', '2024-01-31'],
              dtype='datetime64[ns]', freq='D')

## Multiple dates

Index(['2024-01-01', '2024-01-02', '2024-01-03', '2024-01-04', '2024-01-05',
       '2024-01-06', '2024-01-07', '2024-01-08', '2024-01-09', '2024-01-10',
       '2024-01-11', '2024-01-12', '2024-01-13', '2024-01-14', '2024-01-15',
       '2024-01-16', '2024-01-17', '2024-01-18', '2024-01-19', '2024-01-20',
       '2024-01-21', '2024-01-22', '2024-01-23', '2024-01-24', '2024-01-25',
       '2024-01-26', '2024-01-27', '2024-01-28', '2024-01-29', '2024-01-30',
       '2024-01-31'],
      dtype='object')