# Scraping tables from websites
https://www.omip.pt/es/market-data/spot?date=2024-04-27&commodity=EL&zone=ES

In [45]:
import pandas as pd

## Download raw data

In [48]:
date = '2024-04-27'
url = f'https://www.omip.pt/es/market-data/spot?date={date}&commodity=EL&zone=ES'
url

'https://www.omip.pt/es/market-data/spot?date=2024-04-27&commodity=EL&zone=ES'

In [47]:
dfs = pd.read_html(url)
dfs

[        Index  Price (€)  Chart
 0   SPEL BASE      25.02    NaN
 1         NaN        NaN    NaN
 2  SPEL SOLAR       3.51    NaN
 3         NaN        NaN    NaN,
       Hours  Price (€)  Volume (MWh)
 0   00h-01h      47.88       17459.0
 1   01h-02h      40.00       17285.8
 ..      ...        ...           ...
 22  22h-23h      62.78       16324.9
 23  23h-24h      58.52       17017.6
 
 [24 rows x 3 columns]]

In [34]:
dfs[0]

Unnamed: 0,Index,Price (€),Chart
0,SPEL BASE,25.02,
1,,,
2,SPEL SOLAR,3.51,
3,,,


In [35]:
df = dfs[1]
df

Unnamed: 0,Hours,Price (€),Volume (MWh)
0,00h-01h,47.88,17459.0
1,01h-02h,40.00,17285.8
...,...,...,...
22,22h-23h,62.78,16324.9
23,23h-24h,58.52,17017.6


## Write to file

In [41]:
path = f'../data/{date}.csv'
path

'../data/2024-04-27.csv'

In [42]:
df.to_csv(path, index=False)

## Refactor code into function

In [37]:
def download_and_write(date):
    url = f'https://www.omip.pt/es/market-data/spot?date={date}&commodity=EL&zone=ES'
    dfs = pd.read_html(url)
    df = dfs[1]
    path = f'../data/{date}.csv'
    df.to_csv(path, index=False)

## Today

In [38]:
from datetime import datetime
today = datetime.today().strftime('%Y-%m-%d')
today

'2024-05-07'

In [None]:
download_and_write(today)

## Multiple dates

In [44]:
dates = pd.date_range(start='2024-01-01', end='2024-01-31')
dates = dates.strftime('%Y-%m-%d')
dates

Index(['2024-01-01', '2024-01-02', '2024-01-03', '2024-01-04', '2024-01-05',
       '2024-01-06', '2024-01-07', '2024-01-08', '2024-01-09', '2024-01-10',
       '2024-01-11', '2024-01-12', '2024-01-13', '2024-01-14', '2024-01-15',
       '2024-01-16', '2024-01-17', '2024-01-18', '2024-01-19', '2024-01-20',
       '2024-01-21', '2024-01-22', '2024-01-23', '2024-01-24', '2024-01-25',
       '2024-01-26', '2024-01-27', '2024-01-28', '2024-01-29', '2024-01-30',
       '2024-01-31'],
      dtype='object')

In [None]:
for date in dates:
    download_and_write(date)