# Previsão do mercado financeiro

## Módulo 1 - Base de Dados

In [11]:
import pandas as pd
import numpy as np
import yfinance as yf
import pytz
from pytrends.request import TrendReq

Download dos dados por meio da biblioteca *yfinance*:

In [4]:
petr = yf.download(tickers="PETR4.SA", start = "2020-08-01", end = "2022-06-08", interval = "60m")
oil = yf.download(tickers="CL=F", start = "2020-08-01", end = "2022-06-08", interval = "60m")
dollar = yf.download(tickers="BRL=X", start = "2020-08-01", end = "2022-06-08", interval = "60m")

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Renomeação das features de interesse em cada base:

In [5]:
dollar.rename(columns={'Close':'dollar'}, inplace=True)
oil.rename(columns={'Close':'oil'}, inplace=True)

Unindo os dados com left_join:

In [6]:
data01 = pd.merge(petr, oil["oil"], how = "left", left_index=True, right_index=True)

In [7]:
data02 = pd.merge(data01, dollar["dollar"], how = "left", left_index=True, right_index=True)

In [8]:
data03 = data02.tz_convert('Brazil/East')

In [9]:
data03.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,oil,dollar
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-08-03 10:00:00-03:00,22.540001,22.549999,21.77,21.85,21.85,0.0,40.419998,5.312
2020-08-03 11:00:00-03:00,21.85,22.09,21.780001,21.799999,21.799999,10130100.0,40.959999,5.3297
2020-08-03 12:00:00-03:00,21.790001,22.040001,21.790001,21.950001,21.950001,7127000.0,41.009998,5.319
2020-08-03 13:00:00-03:00,21.959999,22.110001,21.91,22.07,22.07,5119400.0,41.099998,5.3179
2020-08-03 14:00:00-03:00,22.059999,22.09,21.959999,22.030001,22.030001,7982900.0,41.150002,5.3127


In [10]:
data03.isna().sum()

Open          1
High          1
Low           1
Close         1
Adj Close     1
Volume        1
oil          78
dollar        2
dtype: int64

### Dados do Google Trends

In [12]:
pytrends = TrendReq(hl='en-US', tz=180)

kw_list = ['PETR4','PETROBRAS',"CRISE"]

google_tr = pytrends.get_historical_interest(kw_list, year_start=2020,
                                             month_start=8, day_start=1,
                                             hour_start=0, year_end=2022,
                                             month_end=6, day_end=8, hour_end=0,
                                             cat=0, geo='', gprop='', sleep=60)

In [14]:
google_tr = google_tr.tz_localize(tz='Brazil/East')


In [16]:
google_tr.head()

Unnamed: 0_level_0,PETR4,PETROBRAS,CRISE,isPartial
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-08-01 00:00:00-03:00,12,47,52,False
2020-08-01 01:00:00-03:00,17,29,78,False
2020-08-01 02:00:00-03:00,7,12,72,False
2020-08-01 03:00:00-03:00,5,8,33,False
2020-08-01 04:00:00-03:00,3,10,26,False


In [17]:
data04 = pd.merge(data03, google_tr, how = "left", left_index=True, right_index=True)

In [20]:
data04.head()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,oil,dollar,PETR4,PETROBRAS,CRISE,isPartial
2020-08-03 10:00:00-03:00,22.540001,22.549999,21.77,21.85,21.85,0.0,40.419998,5.312,0.0,10.0,64.0,False
2020-08-03 11:00:00-03:00,21.85,22.09,21.780001,21.799999,21.799999,10130100.0,40.959999,5.3297,3.0,15.0,42.0,False
2020-08-03 12:00:00-03:00,21.790001,22.040001,21.790001,21.950001,21.950001,7127000.0,41.009998,5.319,4.0,28.0,71.0,False
2020-08-03 13:00:00-03:00,21.959999,22.110001,21.91,22.07,22.07,5119400.0,41.099998,5.3179,22.0,24.0,66.0,False
2020-08-03 14:00:00-03:00,22.059999,22.09,21.959999,22.030001,22.030001,7982900.0,41.150002,5.3127,35.0,28.0,62.0,False


In [21]:
data04.to_csv("trabalho_final.csv", date_format = '%Y-%m-%d %H:%M:%S')