In [2]:
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

<b>NOTE</b> Look at the comments within the code to see where I added the try/except blocks!

In [10]:
def delta_days_and_current(tickers, dates, delta=7):
    """This function obtains, for each pair of ticker and date, the closing price of the ticker delta days
    after the given date and the closing price of the ticker for the day of the reference date.
    
    For the inputs:
    tickers: List of tickers, each represented by a string. Same length as dates!
    dates: List of dates, each represented in the format %Y-%m-%d (e.g. 2010-01-24)
    delta: Number of days after the reference date from which to obtain the previous price. It can also be a list,
        with as many deltas as desired.
    
    The output is a pandas dataframe, with as many rows as specified tickers, and columns Reference Date, 
    Previous Close, and Current Close."""
    

    if type(delta) == int:
        delta = [delta]
    
    results = {field: [] for field in 
               ['Ticker', "Reference Date", "Current Close"] + \
               [f"Close_Price_{abs(d)}_Days_Before" for d in delta if d < 0] + \
               [f"Close_Price_{d}_Days_Later" for d in delta if d > 0]}
    
    #This unelegant move is because im lazy
    delta = [-d for d in delta]
        
    for i, t in enumerate(tickers):
        #If date falls in weekends, take Friday and Monday
        extra_add = 0
        if datetime.strptime(dates[i], '%Y-%m-%d').isoweekday() == 6:
            extra_add = -1
        elif datetime.strptime(dates[i], '%Y-%m-%d').isoweekday() == 7:
            extra_add = 1
        
        current = datetime.strptime(dates[i], '%Y-%m-%d') + timedelta(days=extra_add)
        
        if max(delta) >= 0:
            max_previous = current + timedelta(days=-max(delta))
            if min(delta) > 0:
                max_next = current
            else:
                max_next = current + timedelta(days=-min(delta))    
        else:
            max_next = current + timedelta(days=-min(delta)) 
            max_previous = current
        
        # this is the try/except block I added during the call
        try:
            data = yf.download(t, start=max_previous + timedelta(days=-2), end=max_next + timedelta(days=2))
        except:
            pass
        
        ## here I turned current_close to an array to avoid the index problem
        current_close = data.loc[data.index == current, 'Close'].values
        try: # we are going to try to convert it from array to float
            current_close = current_close[0].astype(float)
        except:
            pass # sometimes the output is of size 0, so in that case we do nothing
        
        #print(data[['Close']])
        results['Ticker'].append(t)
        results["Reference Date"].append(current)
        results["Current Close"].append(current_close)
        
        for d in delta:
            if d != 0:
                previous = current + timedelta(days=-d)

                #If date falls in weekends, take Friday and Monday
                if previous.isoweekday() == 6:
                    previous += timedelta(days=-1)
                elif previous.isoweekday() == 7:
                    previous += timedelta(days=+1)
                
                previous_close = data.loc[data.index == previous, 'Close'].values
                try:
                    previous_close = previous_close[0].astype(float)
                except:
                    pass

                if d > 0:
                    results[f"Close_Price_{d}_Days_Before"].append(previous_close)
                elif d < 0:
                    results[f"Close_Price_{abs(d)}_Days_Later"].append(previous_close)

    results = pd.DataFrame(results).set_index('Ticker')
    return results

## Testing with filing our dataset

In [4]:
df = pd.read_csv('https://raw.githubusercontent.com/dbejarano31/Master-Thesis-Information-Management/main/consolidated-data')
df.head()

Unnamed: 0.1,Unnamed: 0,CIK Code,Filing,Date Filed_x,Ticker,CompanyName,Previous close,Next Open
0,0,1364954,An investment in our securities involves a hig...,2021-02-27,CHGG,"CHEGG, INC",96.529999,95.870003
1,1,1738758,An investment in our securitiesis speculative ...,2020-09-14,GSMG,GLORY STAR NEW MEDIA GROUP HOLDINGS Ltd,3.01,3.0
2,2,1738758,An investment in our securitiesis speculative ...,2020-09-14,GSMG,GLORY STAR NEW MEDIA GROUP HOLDINGS Ltd,3.42,3.6
3,3,1674930,Investing in our common stock involves risk. B...,2020-08-12,FLGT,"Fulgent Genetics, Inc.",30.25,30.610001
4,4,1422892,Beforeyou make a decision to invest in our sec...,2018-02-16,SINO,"Sino-Global Shipping America, Ltd.",2.02,2.08


In [5]:
ticker_list = list(df['Ticker'])
dates_list = list(df['Date Filed_x'])

pairs = zip(ticker_list, dates_list)
pairs_list = list(pairs)

In [6]:
already_seen = set()
final_list = []
for i in pairs_list:
    if i[0] not in already_seen:
        final_list.append(i)
    already_seen.add(i[0])
len(final_list)

119

In [7]:
final_list

[('CHGG', '2021-02-27'),
 ('GSMG', '2020-09-14'),
 ('FLGT', '2020-08-12'),
 ('SINO', '2018-02-16'),
 ('SSNT', '2020-10-01'),
 ('BW', '2020-02-13'),
 ('ADMS', '2021-05-27'),
 ('ENLV', '2019-06-21'),
 ('ESS', '2021-02-27'),
 ('ITP', '2018-06-19'),
 ('SYN', '2018-05-15'),
 ('CSSE', '2021-01-22'),
 ('CLPT', '2021-01-29'),
 ('NEA', '2021-05-27'),
 ('CGIX', '2020-07-21'),
 ('DPW', '2021-02-01'),
 ('ODT', '2021-02-05'),
 ('AQMS', '2019-12-02'),
 ('SRAX', '2019-12-12'),
 ('NBEV', '2019-04-19'),
 ('LXP', '2021-05-01'),
 ('CRVS', '2020-03-19'),
 ('PHUN', '1933-02-27'),
 ('IOVA', '2021-02-04'),
 ('PLX', '2021-05-27'),
 ('INFI', '2021-02-03'),
 ('MFG', '2021-02-16'),
 ('AMRH', '2019-11-19'),
 ('ARAV', '2020-11-20'),
 ('IFRX', '2021-02-05'),
 ('MICT', '2020-09-04'),
 ('HYRE', '2020-05-07'),
 ('AEYE', '2021-02-11'),
 ('AVXL', '2019-07-15'),
 ('BWAY', '2021-05-27'),
 ('DAO', '2021-05-27'),
 ('SINT', '2020-10-13'),
 ('KSHB', '2019-05-06'),
 ('GPN', '2021-02-01'),
 ('ITMR', '2021-02-06'),
 ('TRCH', '20

In [8]:
ticker_list = []
dates_list = []

for i in final_list:
    ticker_list.append(i[0])
    dates_list.append(i[1])

In [11]:
deltas = [7,5,-1,-2,-5]
df2 = delta_days_and_current(ticker_list, dates_list, deltas)
df2.head()

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- ADMS: Data doesn't exist for startDate = 1621461600, endDate = 1622844000
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[******

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- DFFN: Data doesn't exist for startDate = 1621461600, endDate = 1622844000
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- IPWR: Data doesn't exist for startDate = 1621461600, endDate = 1622844000
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************

Unnamed: 0_level_0,Reference Date,Current Close,Close_Price_1_Days_Before,Close_Price_2_Days_Before,Close_Price_5_Days_Before,Close_Price_7_Days_Later,Close_Price_5_Days_Later
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
CHGG,2021-02-26,96.53,93.35,98.15,103.78,87.55,89.67
GSMG,2020-09-14,3.01,3.01,2.88,3.05,3.4,3.57
FLGT,2020-08-12,41.63,40.91,43.49,35.84,44.01,43.49
SINO,2018-02-16,10.9,10.95,11.3,11.95,10.4,10.15
SSNT,2020-10-01,2.63,2.65,2.61,2.51,2.6,2.51


In [26]:
tickers = ['AAPL', 'GOOG']
dates = ['2012-03-15', '2010-08-21']
deltas = [7, 5, 3, -1, -2, -5]
df = delta_days_and_current(tickers, dates, deltas)
df

[*********************100%***********************]  1 of 1 completed
                Close
Date                 
2012-03-07  18.953215
2012-03-08  19.356787
2012-03-09  19.470358
2012-03-12  19.714287
2012-03-13  20.289286
2012-03-14  21.056429
2012-03-15  20.912857
2012-03-16  20.913214
2012-03-19  21.467857
2012-03-20  21.641430
2012-03-21  21.517857
2012-03-22  21.405001
2012-03-23  21.287500
[*********************100%***********************]  1 of 1 completed
                 Close
Date                  
2010-08-12  245.086349
2010-08-13  242.266922
2010-08-16  241.888336
2010-08-17  244.344131
2010-08-18  240.174759
2010-08-19  233.111237
2010-08-20  230.147354
2010-08-23  231.168518
2010-08-24  224.852203
2010-08-25  226.461166
2010-08-26  224.647964
2010-08-27  228.558304


Unnamed: 0_level_0,Reference Date,Current Close,Close_Price_1_Days_Before,Close_Price_2_Days_Before,Close_Price_5_Days_Before,Close_Price_7_Days_Later,Close_Price_5_Days_Later,Close_Price_3_Days_Later
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AAPL,2012-03-15,20.912857,21.056429,20.289286,19.470358,21.405001,21.64143,21.467857
GOOG,2010-08-20,230.147354,233.111237,240.174759,241.888336,228.558304,226.461166,231.168518
