# Boursorama USD/RUB

Quick, temporary scraper to get USD/RUB quotes from Boursorama (used in Daily Energy Report).

Manual procedure:

- go to https://www.boursorama.com/bourse/devises/taux-de-change-dollar-rouble-USD-RUB/
- click on 6M
- click on the download icon

In [1]:
import logging
import sys

root = logging.getLogger()
root.setLevel(logging.DEBUG)

handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
root.addHandler(handler)

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import requests

url = 'https://www.boursorama.com/bourse/action/graph/ws/GetTicksEOD?symbol=1xUSDRUB&length=180&period=0&guid='
r = requests.get(url)
r.raise_for_status()

2022-03-28 10:16:50,379 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): www.boursorama.com:443
2022-03-28 10:16:50,470 - urllib3.connectionpool - DEBUG - https://www.boursorama.com:443 "GET /bourse/action/graph/ws/GetTicksEOD?symbol=1xUSDRUB&length=180&period=0&guid= HTTP/1.1" 200 2361


In [4]:
print(r.content)

b'{"d":{"Name":"USD\\/ROUBLE RUSSIE","SymbolId":"1xUSDRUB","Xperiod":0,"QuoteTab":[{"d":18898,"o":72.6505,"h":73.0308,"l":72.404,"c":72.8895,"v":0},{"d":18899,"o":72.8915,"h":72.9675,"l":72.588,"c":72.887,"v":0},{"d":18900,"o":72.8866,"h":72.987,"l":72.589,"c":72.71,"v":0},{"d":18901,"o":72.7315,"h":73.25,"l":72.6575,"c":72.6689,"v":0},{"d":18904,"o":72.727,"h":72.9995,"l":72.403,"c":72.4925,"v":0},{"d":18905,"o":72.513,"h":72.681,"l":72.2332,"c":72.3085,"v":0},{"d":18906,"o":72.2781,"h":72.7563,"l":72.2764,"c":72.4245,"v":0},{"d":18907,"o":72.3965,"h":72.4765,"l":71.686,"c":71.753,"v":0},{"d":18908,"o":71.755,"h":72.07,"l":71.6518,"c":71.8808,"v":0},{"d":18911,"o":71.7065,"h":71.9285,"l":71.5555,"c":71.8805,"v":0},{"d":18912,"o":71.8785,"h":72.082,"l":71.7295,"c":71.8425,"v":0},{"d":18913,"o":71.822,"h":72.1299,"l":71.712,"c":72.0165,"v":0},{"d":18914,"o":72.1055,"h":72.1075,"l":71.329,"c":71.4195,"v":0},{"d":18915,"o":71.4249,"h":71.445,"l":70.901,"c":71.1238,"v":0},{"d":18918,"o":71

In [5]:
import pandas as pd
df = pd.DataFrame(r.content)

ValueError: DataFrame constructor not properly called!

In [None]:
df

In [9]:
import pprint, json

pprint.pprint(json.loads(r.content))

{'d': {'Name': 'USD/ROUBLE RUSSIE',
       'QuoteTab': [{'c': 72.8895,
                     'd': 18898,
                     'h': 73.0308,
                     'l': 72.404,
                     'o': 72.6505,
                     'v': 0},
                    {'c': 72.887,
                     'd': 18899,
                     'h': 72.9675,
                     'l': 72.588,
                     'o': 72.8915,
                     'v': 0},
                    {'c': 72.71,
                     'd': 18900,
                     'h': 72.987,
                     'l': 72.589,
                     'o': 72.8866,
                     'v': 0},
                    {'c': 72.6689,
                     'd': 18901,
                     'h': 73.25,
                     'l': 72.6575,
                     'o': 72.7315,
                     'v': 0},
                    {'c': 72.4925,
                     'd': 18904,
                     'h': 72.9995,
                     'l': 72.403,
                     'o'

## Solving the date

The date in the response are in days since Unix Epoch (1/1/1970).

We need to add the value received to this date to get the date.

In [10]:
import datetime

(datetime.datetime.utcnow() - datetime.datetime(1970,1,1)).days

19079

In [16]:
from datetime import timedelta

datetime.datetime(1970,1,1) + timedelta(days=19079)

datetime.datetime(2022, 3, 28, 0, 0)

## Get a Dataframe now

Let's pandalize the result.

In [30]:
df = pd.json_normalize(json.loads(r.content), record_path=['d', 'QuoteTab'])
df

Unnamed: 0,d,o,h,l,c,v
0,18898,72.6505,73.0308,72.4040,72.88950,0
1,18899,72.8915,72.9675,72.5880,72.88700,0
2,18900,72.8866,72.9870,72.5890,72.71000,0
3,18901,72.7315,73.2500,72.6575,72.66890,0
4,18904,72.7270,72.9995,72.4030,72.49250,0
...,...,...,...,...,...,...
124,19072,101.1175,110.2500,101.1175,106.65000,0
125,19073,106.6500,109.7500,102.9999,106.00000,0
126,19074,106.0000,107.0000,95.0000,96.74995,0
127,19075,96.7500,103.5000,95.8749,102.25000,0


In [34]:
# convert d to date
df['d'] = df['d'].map(lambda d: datetime.datetime(1970,1,1) + timedelta(days=d))
df

Unnamed: 0,d,o,h,l,c,v
0,2021-09-28,72.6505,73.0308,72.4040,72.88950,0
1,2021-09-29,72.8915,72.9675,72.5880,72.88700,0
2,2021-09-30,72.8866,72.9870,72.5890,72.71000,0
3,2021-10-01,72.7315,73.2500,72.6575,72.66890,0
4,2021-10-04,72.7270,72.9995,72.4030,72.49250,0
...,...,...,...,...,...,...
124,2022-03-21,101.1175,110.2500,101.1175,106.65000,0
125,2022-03-22,106.6500,109.7500,102.9999,106.00000,0
126,2022-03-23,106.0000,107.0000,95.0000,96.74995,0
127,2022-03-24,96.7500,103.5000,95.8749,102.25000,0


In [37]:
col_mappings: dict = {
                          'd': 'date',
                          'o': 'ouv',
                          'h': 'haut',
                          'l': 'bas',
                          'c': 'clot',
                          'v': 'vol'
                          }
df.rename(columns=col_mappings, inplace=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 129 entries, 0 to 128
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    129 non-null    datetime64[ns]
 1   ouv     129 non-null    float64       
 2   haut    129 non-null    float64       
 3   bas     129 non-null    float64       
 4   clot    129 non-null    float64       
 5   vol     129 non-null    int64         
dtypes: datetime64[ns](1), float64(4), int64(1)
memory usage: 6.2 KB


In [38]:
df

Unnamed: 0,date,ouv,haut,bas,clot,vol
0,2021-09-28,72.6505,73.0308,72.4040,72.88950,0
1,2021-09-29,72.8915,72.9675,72.5880,72.88700,0
2,2021-09-30,72.8866,72.9870,72.5890,72.71000,0
3,2021-10-01,72.7315,73.2500,72.6575,72.66890,0
4,2021-10-04,72.7270,72.9995,72.4030,72.49250,0
...,...,...,...,...,...,...
124,2022-03-21,101.1175,110.2500,101.1175,106.65000,0
125,2022-03-22,106.6500,109.7500,102.9999,106.00000,0
126,2022-03-23,106.0000,107.0000,95.0000,96.74995,0
127,2022-03-24,96.7500,103.5000,95.8749,102.25000,0


In [42]:
from datetime import date
today = date.today().strftime('%Y%M%d')
today

'20220028'

In [43]:
%cd ..

C:\Users\ROSA_L\PycharmProjects\scraper


In [49]:
from iea_scraper.core import factory
job = factory.get_scraper_job('com_boursorama', 'usd_rub_rate', full_load=True)
job.run()


2022-03-28 12:09:48,205 - iea_scraper.core.factory - DEBUG - Loading module iea_scraper.jobs.com_boursorama.usd_rub_rate
2022-03-28 12:09:48,207 - iea_scraper.core.factory - DEBUG - Getting class UsdRubRateJob
2022-03-28 12:09:48,208 - iea_scraper.core.job - INFO - Temporary table name: #boursorama_usd_rub_rates_temp, final table name: boursorama_usd_rub_rates_data
2022-03-28 12:09:48,209 - iea_scraper.jobs.com_boursorama.usd_rub_rate - INFO - Setting source to download 180 days of data.
2022-03-28 12:09:48,210 - iea_scraper.core.job - DEBUG - download: True, parallel download: True
2022-03-28 12:09:48,211 - iea_scraper.core.utils - INFO - Executing function download_source() over 1 items with a maximum of 15 parallel workers.
2022-03-28 12:09:48,218 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): www.boursorama.com:443
2022-03-28 12:09:48,346 - urllib3.connectionpool - DEBUG - https://www.boursorama.com:443 "GET /bourse/action/graph/ws/GetTicksEOD?symbol=1xUSDRUB

In [52]:
job = factory.get_scraper_job('com_boursorama', 'usd_rub_rate')
job.run()

2022-03-28 12:35:49,394 - iea_scraper.core.factory - DEBUG - Loading module iea_scraper.jobs.com_boursorama.usd_rub_rate
2022-03-28 12:35:49,396 - iea_scraper.core.factory - DEBUG - Getting class UsdRubRateJob
2022-03-28 12:35:49,397 - iea_scraper.core.job - INFO - Temporary table name: #boursorama_usd_rub_rates_temp, final table name: boursorama_usd_rub_rates_data
2022-03-28 12:35:49,398 - iea_scraper.core.job - DEBUG - download: True, parallel download: True
2022-03-28 12:35:49,400 - iea_scraper.core.utils - INFO - Executing function download_source() over 1 items with a maximum of 15 parallel workers.
2022-03-28 12:35:49,406 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): www.boursorama.com:443
2022-03-28 12:35:49,528 - urllib3.connectionpool - DEBUG - https://www.boursorama.com:443 "GET /bourse/action/graph/ws/GetTicksEOD?symbol=1xUSDRUB&length=180&period=0&guid= HTTP/1.1" 200 2361
2022-03-28 12:35:49,531 - iea_scraper.core.job - DEBUG - 8351 bytes written to 

In [45]:
%conda install -y pycountry

Collecting package metadata (current_repodata.json): ...working... done
Note: you may need to restart the kernel to use updated packages.
Solving environment: ...working... done

## Package Plan ##

  environment location: C:\Users\ROSA_L\AppData\Local\Continuum\Anaconda3

  added / updated specs:
    - pycountry


The following NEW packages will be INSTALLED:

  pycountry          conda-forge/noarch::pycountry-20.7.3-pyh9f0ad1d_0


Preparing transaction: ...working... done
Verifying transaction: ...working... done
Executing transaction: ...working... done



