### Obtaining Data

#### Yahoo Finance and Pandas

In [19]:
import yfinance as yf

# Define o ticker e o intervalo de datas
ticker = "SPY"
inicio = "2010-02-01"
fim = "2022-06-15"

# Baixa os dados do ticker selecionado
dados = yf.download(ticker, start=inicio, end=fim)

# Mostra os últimos registros
print(dados.tail())

[*********************100%***********************]  1 of 1 completed
                  Open        High         Low       Close   Adj Close  \
Date                                                                     
2022-06-08  413.929993  415.820007  410.380005  411.220001  399.835297   
2022-06-09  409.339996  411.739990  401.440002  401.440002  390.326080   
2022-06-10  394.880005  395.779999  389.750000  389.799988  379.008301   
2022-06-13  379.850006  381.809998  373.299988  375.000000  364.618073   
2022-06-14  376.850006  377.940002  370.589996  373.869995  363.519318   

               Volume  
Date                   
2022-06-08   64350000  
2022-06-09   86289800  
2022-06-10  132893900  
2022-06-13  170004900  
2022-06-14  104011800  


In [None]:
#!/usr/bin/python
# -*- coding: utf-8 -*-
# quandl_data.py


from __future__ import print_function
import matplotlib.pyplot as plt
import pandas as pd
import requests

def construct_futures_symbols(symbol, start_year=2010, end_year=2014):
    """
    Constructs a list of futures contract codes
    for a particular symbol and timeframe.
    """
    
    futures = []
    # March, June, September and
    # December delivery codes
    months = 'HMUZ'
    for y in range(start_year, end_year+1):
        for m in months:
            futures.append("%s%s%s" % (symbol, m, y))
            
    return futures


def download_contract_from_quandl(contract, dl_dir):
    """
    Download an individual futures contract from Quandl and then
    store it to disk in the 'dl_dir' directory. An auth_token is
    required, which is obtained from the Quandl upon sign-up.
    """
    
    # Construct the API call from the contract and auth_token
    api_call = "http://www.quandl.com/api/v1/datasets/"
    api_call += "OFDP/FUTURE_%s.csv" % contract
    # If you wish to add an auth token for more downloads, simply
    # comment the following line and replace MY_AUTH_TOKEN with
    # your auth token in the line below
    params = "?sort_order=asc"
    #params = "?auth_token=MY_AUTH_TOKEN&sort_order=asc"
    full_url = "%s%s" % (api_call, params)
    
    # Download the data from Quandl
    data = requests.get(full_url).text
    
    # Store the data to disk
    fc = open('%s/%s.csv' % (dl_dir, contract), 'w')
    fc.write(data)
    fc.close()


def download_historical_contracts(symbol, dl_dir, 
                                  start_year=2010, 
                                  end_year=2014):
    """
    Downloads all futures contracts for a specified symbol
    between a start_year and an end_year.
    """

    contracts = construct_futures_symbols(symbol, start_year, end_year)

    for c in contracts:
        print("Downloading contract: %s" % c)
        download_contract_from_quandl(c, dl_dir)
        
if __name__ == "__main__":
    symbol = 'ES'
    
    # Make sure you’ve created this
    # relative directory beforehand
    dl_dir = 'quandl/futures/ES'
    
    # Create the start and end years
    start_year = 2010
    end_year = 2014
    
    # Download the contracts into the directory
    download_historical_contracts(symbol, 
                                  dl_dir, 
                                  start_year, 
                                  end_year)
    
    # Open up a single contract via read_csv
    # and plot the settle price
    es = pd.io.parsers.read_csv("%s/ESH2010.csv" % dl_dir, index_col="Date")
    es["Settle"].plot()
    plt.show()

#### DTN IQFeed

In [None]:
#!/usr/bin/python
# -*- coding: utf-8 -*-
# iqfeed.py


import sys
import socket


def read_historical_data_socket(sock, recv_buffer=4096):
    """
    Read the information from the socket, in a buffered
    fashion, receiving only 4096 bytes at a time.
    Parameters:
    sock - The socket object
    recv_buffer - Amount in bytes to receive per read
    """
    
    buffer = ""
    data = ""
    while True:
        data = sock.recv(recv_buffer)
        buffer += data
        # Check if the end message string arrives
        if "!ENDMSG!" in buffer:
            break
    
    # Remove the end message string
    buffer = buffer[:-12]
    return buffer

if __name__ == "__main__":
    # Define server host, port and symbols to download
    host = "127.0.0.1" # Localhost
    port = 9100 # Historical data socket port
    syms = ["SPY", "IWM"]

# Download each symbol to disk
for sym in syms:
    print ('Downloading symbol: %s...') % sym
    
    # Construct the message needed by IQFeed to retrieve data
    message = "HIT,%s,60,20070101 075000,,,093000,160000,1\n" % sym
    
    # Open a streaming socket to the IQFeed server locally
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    sock.connect((host, port))
    
    # Send the historical data request
    # message and buffer the data
    sock.sendall(message)
    data = read_historical_data_socket(sock)
    sock.close
    
    # Remove all the endlines and line-ending
    # comma delimiter from each record
    data = "".join(data.split("\r"))
    data = data.replace(",\n","\n")[:-1]
    
    # Write the data stream to disk
    f = open("%s.csv" % sym, "w")
    f.write(data)
    f.close()

### Cleaning Financial Data

pip install Quandl

In [None]:
#!/usr/bin/python
# -*- coding: utf-8 -*-
# cont_futures.py

from __future__ import print_function

import datetime

import numpy as np
import pandas as pd
import Quandl

def futures_rollover_weights(start_date, expiry_dates,contracts, rollover_days = 5):
    """
    This constructs a pandas DataFrame that contains weights
    (between 0.0 and 1.0) of contract positions to hold in order to
    carry out a rollover of rollover_days prior to the expiration of
    the earliest contract. The matrix can then be ’multiplied’ with
    another DataFrame containing the settle prices of each
    contract in order to produce a continuous time series
    futures contract.
    """
    
    # Construct a sequence of dates beginning
    # from the earliest contract start date to the end
    # date of the final contract
    dates = pd.date_range(start_date, expiry_dates[-1], freq='B')
    
    # Create the ’roll weights’ DataFrame that will store the multipliers for
    # each contract (between 0.0 and 1.0)
    roll_weights = pd.DataFrame(np.zeros((len(dates), 
                                          len(contracts))),
                                index=dates, columns=contracts)
    
    prev_date = roll_weights.index[0]
    
    # Loop through each contract and create the specific weightings for
    # each contract depending upon the settlement date and rollover_days
    for i, (item, ex_date) in enumerate(expiry_dates.iteritems()):
        if i < len(expiry_dates) - 1:
            roll_weights.ix[prev_date:ex_date - pd.offsets.BDay(), item] = 1
            roll_rng = pd.date_range(end = ex_date - pd.offsets.BDay(),
                                     periods = rollover_days + 1, freq='B')
            
            # Create a sequence of roll weights (i.e. [0.0,0.2,...,0.8,1.0]
            # and use these to adjust the weightings of each future
            decay_weights = np.linspace(0, 1, rollover_days + 1)
            roll_weights.ix[roll_rng, item] = 1 - decay_weights
            roll_weights.ix[roll_rng,
                            expiry_dates.index[i+1]] = decay_weights
        else:
            roll_weights.ix[prev_date:, item] = 1
        
        prev_date = ex_date
    
    return roll_weights

if __name__ == "__main__":
    # Download the current Front and Back (near and far) futures contracts
    # for WTI Crude, traded on NYMEX, from Quandl.com. You will need to
    # adjust the contracts to reflect your current near/far contracts
    # depending upon the point at which you read this!
    wti_near = Quandl.get("OFDP/FUTURE_CLF2014")
    wti_far = Quandl.get("OFDP/FUTURE_CLG2014")
    wti = pd.DataFrame({'CLF2014': wti_near['Settle'],
                        'CLG2014': wti_far['Settle']},
                        index = wti_far.index)
    
    
    # Create the dictionary of expiry dates for each contract
    expiry_dates = pd.Series({'CLF2014': datetime.datetime(2013, 12, 19),
                            'CLG2014': datetime.datetime(2014, 2, 21)}).order()
    
    # Obtain the rollover weighting matrix/DataFrame
    weights = futures_rollover_weights(wti_near.index[0],
                                       expiry_dates, wti.columns)

    # Construct the continuous future of the WTI CL contracts
    wti_cts = (wti * weights).sum(1).dropna()
    # Output the merged serie

### End.