# **Using S3 bucket to store data** 

database : https://www.tesourotransparente.gov.br/ckan/dataset

1 - Below you will see functions to get and treat data from *tesouro direto*

In [3]:
import pandas as pd
pd.set_option("display.max_colwidth", 75)
pd.set_option("display.min_rows", 10)

import matplotlib
matplotlib.rcParams['figure.figsize'] = (9,4)

In [21]:
def busca_titulos_tesouro_direto():
    url = 'https://www.tesourotransparente.gov.br/ckan/dataset/df56aa42-484a-4a59-8184-7676580c81e3/resource/796d2059-14e9-44e3-80c9-2d9e30b405c1/download/PrecoTaxaTesouroDireto.csv'
    df = pd.read_csv(url, sep=';', decimal=',')
    df['Data Vencimento'] = pd.to_datetime(df['Data Vencimento'], dayfirst=True)
    df['Data Base'] = pd.to_datetime(df['Data Base'], dayfirst=True)
    multi_indice = pd.MultiIndex.from_frame(df.iloc[:,:3])
    df = df.set_index(multi_indice).iloc[:, 3:]
    return df

2 - Searching data

In [22]:
df = busca_titulos_tesouro_direto()
df.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Taxa Compra Manha,Taxa Venda Manha,PU Compra Manha,PU Venda Manha,PU Base Manha
Tipo Titulo,Data Vencimento,Data Base,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Tesouro Prefixado com Juros Semestrais,2031-01-01,2022-07-08,13.09,13.21,854.65,849.09,849.09
Tesouro IPCA+,2035-05-15,2022-07-08,6.07,6.19,1878.75,1850.07,1850.07
Tesouro Prefixado,2023-01-01,2022-07-08,13.71,13.83,940.17,939.21,939.21
Tesouro Prefixado,2024-07-01,2022-07-08,13.16,13.28,784.77,782.75,782.75
Tesouro Prefixado,2029-01-01,2022-07-08,12.97,13.09,455.04,451.71,451.71
Tesouro Prefixado com Juros Semestrais,2025-01-01,2022-07-08,12.9,13.02,945.37,942.66,942.66
Tesouro IPCA+ com Juros Semestrais,2035-05-15,2022-07-08,6.05,6.17,4020.37,3975.83,3975.83
Tesouro Prefixado,2026-01-01,2022-07-08,12.77,12.89,658.2,655.45,655.45
Tesouro Selic,2025-03-01,2022-07-08,0.1,0.11,11849.77,11840.74,11840.74
Tesouro Prefixado com Juros Semestrais,2027-01-01,2022-07-08,12.88,13.0,911.32,907.32,907.32


3 - Saving dataframe into CSV file

In [8]:
due_date = "2025-01-01"
fixed_2025 =  df.loc[("Tesouro Prefixado", due_date)]

  fixed_2025 =  df.loc[("Tesouro Prefixado", due_date)]


In [9]:
fixed_2025

Unnamed: 0_level_0,Taxa Compra Manha,Taxa Venda Manha,PU Compra Manha,PU Venda Manha,PU Base Manha
Data Base,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-07-19,13.44,13.56,734.36,732.09,732.09
2022-08-19,12.21,12.33,762.19,759.93,759.93
2022-08-26,12.13,12.25,765.21,762.95,762.95
2022-07-28,13.10,13.22,742.31,740.04,740.04
2022-08-30,12.13,12.25,765.91,763.65,763.65
...,...,...,...,...,...
2022-09-15,12.02,12.14,771.47,769.24,769.24
2022-08-17,11.95,12.07,765.69,763.40,763.40
2022-06-27,12.57,12.69,742.73,740.39,740.39
2022-11-24,13.72,13.84,763.46,761.38,761.38


In [10]:
fixed_2025.to_csv('fixed_2025.csv')

4 - Saving several files

In [11]:
list_values = {'Tesouro Prefixado' : ['2025-01-01','2029-01-01'],
    'Tesouro IPCA+':['2026-08-15','2035-05-15','2035-05-15','2045-05-15']}

In [None]:
for titulo, due_dates in list_values.items():
    for due_date in due_dates : 
         name      = titulo.replace(' ','_').lower()
         year      = str(due_date)[0:4]
         filename = name + '_' + year + '.csv'
         print(filename,titulo)
         data      = df.loc[(titulo,due_date)]
         data.to_csv(filename)

tesouro_prefixado_2025.csv Tesouro Prefixado
tesouro_prefixado_2029.csv Tesouro Prefixado
tesouro_ipca+_2026.csv Tesouro IPCA+
tesouro_ipca+_2035.csv Tesouro IPCA+
tesouro_ipca+_2035.csv Tesouro IPCA+
tesouro_ipca+_2045.csv Tesouro IPCA+


  data      = df.loc[(titulo,due_date)]


5 - Using Boto3 to upload files to S3

In [30]:
import boto3

In [44]:
session = boto3.Session(
    aws_access_key_id     = "AWS_SERVER_PUBLIC_KEY",
    aws_secret_access_key = "AWS_SERVER_PUBLIC_KEY",
    region_name           = "AWS_REGION_NAME"
)

In [42]:
s3 = session.resource('s3')
bucket = 'btc-rf-mod2'

In [43]:
for titulo, due_dates in list_values.items():
    for due_date in due_dates : 
         name      = titulo.replace(' ','_').lower()
         year      = str(due_date)[0:4]
         filename = name + '_' + year + '.csv'
         s3.Bucket(bucket).upload_file(filename,'titulos_publicos/' + name + '/' + year + '/' + filename)
