## Packages

In [1]:
import os
import glob
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
from time import time
#os.chdir('C:\\Users\\A Sua\\Documents\\FIU\\CAP2020')
os.chdir('C:\\Users\\Daniel\\Python and R Notebooks\\CAP2020\\CAP2020')

## Define Functions

In [2]:
# change to directory of raw 765 quarterly files
folder = "C:\\Users\\Daniel\\Python and R Notebooks\\CAP2020\\CAP2020\\data\\raw\\stock_quarterly_data"
data_subdir = "data\\interim\\compiled_quarterly_2.csv"
final_data = "data\\processed\\compiled_quarterly_final.csv"
final_data_v2 = "data\\processed\\compiled_quarterly_final_v2.csv"

In [3]:
#print computation time script
def timing(f):
    def wrap(*args):
        time1 = time()
        ret = f(*args)
        time2 = time()
        print('{:s} function took {:.3f} ms'.format(f.__name__, (time2-time1)*1000.0))

        return ret
    return wrap

## Import Data

In [77]:
data = []
for file in Path(folder).glob('*.csv'):
        df = pd.read_csv(file)
        stock_name = os.path.basename(file)
        stock_name_noext = stock_name.replace("_quarterly_financial_data.csv", "")
        df['Symbol'] = stock_name_noext
        data.append(df)

In [78]:
bigframe = pd.concat(data)
print('initial shape {}'.format(bigframe.shape))

initial shape (62253, 42)


## Begin Cleaning

### Drop Duplicates

In [79]:
bigframe = bigframe.drop_duplicates()
print(print('new shape: {}'.format(bigframe.shape)))

new shape: (62253, 42)
None


### Replace 'None" with NaN

In [80]:
bigframe.replace('None', np.nan, inplace=True)

### Clean ticker names

### ABI_0 acquired by LIFE in Q4 2008, LIFE merged into TMO in Q4 2013
### CCE becomes CCE_1 smoothly in Q3 2010, becomes CCEP after Q2 2016 (no data)
### DG_1 is just DG (Dollar general)
### GAS(Nicor) acquired by GAS_0(AGL resources) in Q4 2011; GAS_0 acquired by SO in Q2 2016
### GCI_0 (Tegna) split from GCI in Q2 2015
### GGP_0 becomes GGP in 2010 (Q1-Q3 data not available), then GGP acquired by BPY in Q3 2018 (no BPY data)
### GM_0 becomes GM in Q3 2009 (Q2 2009 is skipped)
### LEH_0 acquired by BCS and NMH in Q3 2008 (both no data)
### MNST_0 should be MWW, acquired by Ranstad Holding in Q4 2016 (no data)
### PLD acquired by PLD_0 in Q2 2011
### Q acquired by CTL in Q1 2011
### Q_1 should be IQV
### WM_0 dissolved in Q3 2008

In [81]:
bigframe['Symbol'] = bigframe['Symbol'].astype(str)

In [82]:
# replace ABI_0 with ABI
bigframe['Symbol'].replace({'ABI_0':'ABI'}, inplace = True)

# replace CCE_1 with CCE
bigframe['Symbol'].replace({'CCE_1':'CCE'}, inplace = True)

# DG_1 with DG
bigframe['Symbol'].replace({'DG_1':'DG'}, inplace = True)

# replace GAS_0 with GAS_new
bigframe['Symbol'].replace({'GAS_0':'GAS_new'}, inplace = True)

# replace GAS with GAS_old
bigframe['Symbol'].replace({'GAS':'GAS_old'}, inplace = True)

# replace GCI_0 with TEGNA
bigframe['Symbol'].replace({'GCI_0':'TEGNA'}, inplace = True)

#replace GGP_0 with GGP
bigframe['Symbol'].replace({'GGP_0':'GGP'}, inplace = True)

#replace GM_0 with GM
bigframe['Symbol'].replace({'GM_0':'GM'}, inplace = True)

#replace LEH_0 with LEH
bigframe['Symbol'].replace({'LEH_0':'LEH'}, inplace = True)

#replace MNST_0 with MWW
bigframe['Symbol'].replace({'MNST_0':'MWW'}, inplace = True)

# replace Q_1 with IQV
bigframe['Symbol'].replace({'Q_1':'IQV'}, inplace = True)

# replace WM_0 with WM_old
bigframe['Symbol'].replace({'WM_0':'WM_old'}, inplace = True)

#replace PLD with PLD_old
bigframe['Symbol'].replace({'PLD':'PLD_old'}, inplace = True)

#replace PLD_0 with PLD_new
bigframe['Symbol'].replace({'PLD_0':'PLD_new'}, inplace = True)

In [83]:
bigframe['Symbol'].unique()

array(['AAN', 'AAPL', 'AAP', 'AA', 'ABBV', 'ABC', 'ABI', 'ABK', 'ABMD',
       'ABT', 'ACAS', 'ACC', 'ACE', 'ACIW', 'ACM', 'ACN', 'ACS', 'ACV',
       'ACXM', 'ADBE', 'ADCT', 'ADI', 'ADM', 'ADP', 'ADSK', 'ADS', 'ADT',
       'AEE', 'AEO', 'AEP', 'AES', 'AET', 'AFG', 'AFL', 'AGCO', 'AGN',
       'AHL', 'AIG', 'AIV', 'AIZ', 'AJG', 'AKAM', 'AKRX', 'ALB', 'ALEX',
       'ALGN', 'ALK', 'ALLE', 'ALL', 'ALTR', 'ALXN', 'AMAT', 'AMCX',
       'AMD', 'AME', 'AMGN', 'AMG', 'AMP', 'AMR', 'AMSC', 'AMT', 'AMZN',
       'ANDW', 'ANF', 'ANR', 'ANSS', 'AN', 'AOC', 'AOS', 'APA', 'APC',
       'APD', 'APH', 'APOL', 'APU', 'ARE', 'ARG', 'ARRS', 'ARW', 'ASH',
       'ATI', 'ATVI', 'AVB', 'AVGO', 'AVP', 'AVY', 'AWK', 'AW', 'AXP',
       'AYE', 'AYI', 'AZO', 'A', 'BAC', 'BAX', 'BA', 'BBBY', 'BBT', 'BBY',
       'BCR', 'BC', 'BDK', 'BDX', 'BEN', 'BF.B', 'BHF', 'BHI', 'BIG',
       'BIIB', 'BJS', 'BKE', 'BK', 'BLK', 'BLL', 'BMC', 'BMS', 'BMY',
       'BNI', 'BRCM', 'BRK.A', 'BRLI', 'BRL', 'BSC', 'BSX', 'BTU', 

In [84]:
bigframe.to_csv(data_subdir, index=False)

### Transform Columns into Proper Datatypes

#### P/E Ratio - The ratio of Price to EPS diluted TTM as of the previous quarter.
#### EPS basic - Basic earnings per share for a given quarter
#### EPS diluted - Diluted earnings per share.
#### Dividend per share - Common stock dividends paid during a quarter per share, including all regular and special dividends and distributions to common shareholders.
#### Price - The medium price per share of the company common stock during a given quarter. The prices are as reported, and are not adjusted for subsequent dividends.
#### Long-term debt to equity ratio - The ratio of Long-term debt to common shareholders' equity (Shareholders equity minus Preferred equity).
#### Dividend payout ratio - The ratio of Dividends TTM to Earnings (available to common stockholders) TTM.
#### Shares split adjusted - The number of shares the company had at the end of a given quarter, adjusted for splits to be comparable to today's shares.

In [85]:
bigframe['P/E ratio'] = bigframe['P/E ratio'].astype(float)
bigframe['EPS basic'] = bigframe['EPS basic'].astype(float)
bigframe['EPS diluted'] = bigframe['EPS diluted'].astype(float)
bigframe['Dividend per share'] = bigframe['Dividend per share'].astype(float)
bigframe['Price'] = bigframe['Price'].astype(float)
bigframe['Long-term debt to equity ratio'] = bigframe['Long-term debt to equity ratio'].astype(float)
bigframe['Dividend payout ratio'] = bigframe['Dividend payout ratio'].astype(float)
bigframe['Shares split adjusted'] = bigframe['Shares split adjusted'].astype(float)

#### Quarter end

In [86]:
bigframe['Quarter end'] = pd.to_datetime(bigframe['Quarter end'], errors='ignore')
bigframe.head()

Unnamed: 0,Quarter end,Shares,Shares split adjusted,Split factor,Assets,Current Assets,Liabilities,Current Liabilities,Shareholders equity,Non-controlling interest,...,P/E ratio,Cumulative dividends per share,Dividend payout ratio,Long-term debt to equity ratio,Equity to assets ratio,Net margin,Asset turnover,Free cash flow per share,Current ratio,Symbol
0,2019-09-30,67151778,67151778.0,1.0,3219620000,,1351965000,,1867655000,0,...,21.69,1.04,0.0488,0.1859,0.5801,0.0509,1.26,1.32,,AAN
1,2019-06-30,67547497,67547497.0,1.0,3180198000,,1330640000,,1849558000,0,...,19.94,1.01,0.0464,0.188,0.5816,0.052,1.31,0.84,,AAN
2,2019-03-31,67677449,67677449.0,1.0,3237966000,,1424458000,,1813508000,0,...,17.49,0.97,0.0459,0.2251,0.5601,0.0515,1.37,2.09,,AAN
3,2018-12-31,67202919,67202919.0,1.0,2826692000,,1065984000,,1760708000,0,...,10.83,0.94,0.0455,0.1937,0.6229,0.0512,1.41,-0.43,,AAN
4,2018-09-30,68594784,68594784.0,1.0,2698898000,,935392000,,1763506000,0,...,12.0,0.9,0.0267,0.1686,0.6534,0.0839,1.39,1.13,,AAN


#### Symbol

In [93]:
finalframe = bigframe[['Quarter end', 'Shares split adjusted', 'Price', 'EPS basic', 'EPS diluted', 'Dividend per share', 
                       'Dividend payout ratio', 'P/E ratio', 'Long-term debt to equity ratio', 'Symbol']]
finalframe.head()

Unnamed: 0,Quarter end,Shares split adjusted,Price,EPS basic,EPS diluted,Dividend per share,Dividend payout ratio,P/E ratio,Long-term debt to equity ratio,Symbol
0,2019-09-30,67151778.0,63.99,0.59,0.58,0.035,0.0488,21.69,0.1859,AAN
1,2019-06-30,67547497.0,57.23,0.63,0.62,0.035,0.0464,19.94,0.188,AAN
2,2019-03-31,67677449.0,48.63,0.83,0.82,0.035,0.0459,17.49,0.2251,AAN
3,2018-12-31,67202919.0,47.0,0.9,0.89,0.04,0.0455,10.83,0.1937,AAN
4,2018-09-30,68594784.0,48.86,0.64,0.62,0.03,0.0267,12.0,0.1686,AAN


In [4]:
#finalframe.to_csv(final_data, index=False)
finalframe= pd.read_csv(final_data)

### Convert Dates to Quarters

In [5]:
finalframe.head()

Unnamed: 0,Quarter end,Shares split adjusted,Price,EPS basic,EPS diluted,Dividend per share,Dividend payout ratio,P/E ratio,Long-term debt to equity ratio,Symbol
0,2019-09-30,67151778.0,63.99,0.59,0.58,0.035,0.0488,21.69,0.1859,AAN
1,2019-06-30,67547497.0,57.23,0.63,0.62,0.035,0.0464,19.94,0.188,AAN
2,2019-03-31,67677449.0,48.63,0.83,0.82,0.035,0.0459,17.49,0.2251,AAN
3,2018-12-31,67202919.0,47.0,0.9,0.89,0.04,0.0455,10.83,0.1937,AAN
4,2018-09-30,68594784.0,48.86,0.64,0.62,0.03,0.0267,12.0,0.1686,AAN


In [7]:
finalframe['Quarter end'] = pd.to_datetime(finalframe['Quarter end'], infer_datetime_format = True)
finalframe.dtypes

Quarter end                       datetime64[ns]
Shares split adjusted                    float64
Price                                    float64
EPS basic                                float64
EPS diluted                              float64
Dividend per share                       float64
Dividend payout ratio                    float64
P/E ratio                                float64
Long-term debt to equity ratio           float64
Symbol                                    object
dtype: object

#### First Iteration - 2 week grace period

In [8]:
# period filters
periods = [(finalframe['Quarter end'] >='10/15/2019'), 
            (finalframe['Quarter end'] < '10/15/2019') & (finalframe['Quarter end'] >= '7/15/2019'),
            (finalframe['Quarter end'] < '7/15/2019') & (finalframe['Quarter end'] >= '4/15/2019'),
            (finalframe['Quarter end'] < '4/15/2019') & (finalframe['Quarter end'] >= '1/15/2019'),
            (finalframe['Quarter end'] < '1/15/2019') & (finalframe['Quarter end'] >= '10/15/2018'),
            (finalframe['Quarter end'] < '10/15/2018') & (finalframe['Quarter end'] >= '7/15/2018'),
            (finalframe['Quarter end'] < '7/15/2018') & (finalframe['Quarter end'] >= '4/15/2018'),
            (finalframe['Quarter end'] < '4/15/2018') & (finalframe['Quarter end'] >= '1/15/2018'),
            (finalframe['Quarter end'] < '1/15/2018') & (finalframe['Quarter end'] >= '10/15/2017'),
            (finalframe['Quarter end'] < '10/15/2017') & (finalframe['Quarter end'] >= '7/15/2017'),
            (finalframe['Quarter end'] < '7/15/2017') & (finalframe['Quarter end'] >= '4/15/2017'),
            (finalframe['Quarter end'] < '4/15/2017') & (finalframe['Quarter end'] >= '1/15/2017'),
            (finalframe['Quarter end'] < '1/15/2017') & (finalframe['Quarter end'] >= '10/15/2016'),
            (finalframe['Quarter end'] < '10/15/2016') & (finalframe['Quarter end'] >= '7/15/2016'),
            (finalframe['Quarter end'] < '7/15/2016') & (finalframe['Quarter end'] >= '4/15/2016'),
            (finalframe['Quarter end'] < '4/15/2016') & (finalframe['Quarter end'] >= '1/15/2016'),
            (finalframe['Quarter end'] < '1/15/2016') & (finalframe['Quarter end'] >= '10/15/2015'),
            (finalframe['Quarter end'] < '10/15/2015') & (finalframe['Quarter end'] >= '7/15/2015'),
            (finalframe['Quarter end'] < '7/15/2015') & (finalframe['Quarter end'] >= '4/15/2015'),
            (finalframe['Quarter end'] < '4/15/2015') & (finalframe['Quarter end'] >= '1/15/2015'),
            (finalframe['Quarter end'] < '1/15/2015') & (finalframe['Quarter end'] >= '10/15/2014'),
            (finalframe['Quarter end'] < '10/15/2014') & (finalframe['Quarter end'] >= '7/15/2014'),
            (finalframe['Quarter end'] < '7/15/2014') & (finalframe['Quarter end'] >= '4/15/2014'),
            (finalframe['Quarter end'] < '4/15/2014') & (finalframe['Quarter end'] >= '1/15/2014'),
            (finalframe['Quarter end'] < '1/15/2014') & (finalframe['Quarter end'] >= '10/15/2013'),
            (finalframe['Quarter end'] < '10/15/2013') & (finalframe['Quarter end'] >= '7/15/2013'),
            (finalframe['Quarter end'] < '7/15/2013') & (finalframe['Quarter end'] >= '4/15/2013'),
            (finalframe['Quarter end'] < '4/15/2013') & (finalframe['Quarter end'] >= '1/15/2013'),
            (finalframe['Quarter end'] < '1/15/2013') & (finalframe['Quarter end'] >= '10/15/2012'),
            (finalframe['Quarter end'] < '10/15/2012') & (finalframe['Quarter end'] >= '7/15/2012'),
            (finalframe['Quarter end'] < '7/15/2012') & (finalframe['Quarter end'] >= '4/15/2012'),
            (finalframe['Quarter end'] < '4/15/2012') & (finalframe['Quarter end'] >= '1/15/2012'),
            (finalframe['Quarter end'] < '1/15/2012') & (finalframe['Quarter end'] >= '10/15/2011'),
            (finalframe['Quarter end'] < '10/15/2011') & (finalframe['Quarter end'] >= '7/15/2011'),
            (finalframe['Quarter end'] < '7/15/2011') & (finalframe['Quarter end'] >= '4/15/2011'),
            (finalframe['Quarter end'] < '4/15/2011') & (finalframe['Quarter end'] >= '1/15/2011'),
            (finalframe['Quarter end'] < '1/15/2011') & (finalframe['Quarter end'] >= '10/15/2010'),
            (finalframe['Quarter end'] < '10/15/2010') & (finalframe['Quarter end'] >= '7/15/2010'),
            (finalframe['Quarter end'] < '7/15/2010') & (finalframe['Quarter end'] >= '4/15/2010'),
            (finalframe['Quarter end'] < '4/15/2010') & (finalframe['Quarter end'] >= '1/15/2010'),
            (finalframe['Quarter end'] < '1/15/2010') & (finalframe['Quarter end'] >= '10/15/2009'),
            (finalframe['Quarter end'] < '10/15/2009') & (finalframe['Quarter end'] >= '7/15/2009'),
            (finalframe['Quarter end'] < '7/15/2009') & (finalframe['Quarter end'] >= '4/15/2009'),
            (finalframe['Quarter end'] < '4/15/2009') & (finalframe['Quarter end'] >= '1/15/2009'),
            (finalframe['Quarter end'] < '1/15/2009') & (finalframe['Quarter end'] >= '10/15/2008'),
            (finalframe['Quarter end'] < '10/15/2008') & (finalframe['Quarter end'] >= '7/15/2008'),
            (finalframe['Quarter end'] < '7/15/2008') & (finalframe['Quarter end'] >= '4/15/2008'),
            (finalframe['Quarter end'] < '4/15/2008') & (finalframe['Quarter end'] >= '1/15/2008'),
            (finalframe['Quarter end'] < '1/15/2008') & (finalframe['Quarter end'] >= '10/15/2007'),
            (finalframe['Quarter end'] < '10/15/2007') & (finalframe['Quarter end'] >= '7/15/2007'),
            (finalframe['Quarter end'] < '7/15/2007') & (finalframe['Quarter end'] >= '4/15/2007'),
            (finalframe['Quarter end'] < '4/15/2007') & (finalframe['Quarter end'] >= '1/15/2007'),
            (finalframe['Quarter end'] < '1/15/2007') & (finalframe['Quarter end'] >= '10/15/2006'),
            (finalframe['Quarter end'] < '10/15/2006') & (finalframe['Quarter end'] >= '7/15/2006'),
            (finalframe['Quarter end'] < '7/15/2006') & (finalframe['Quarter end'] >= '4/15/2006'),
            (finalframe['Quarter end'] < '4/15/2006') & (finalframe['Quarter end'] >= '1/15/2006'),
            (finalframe['Quarter end'] < '1/15/2006') & (finalframe['Quarter end'] >= '10/15/2005'),
            (finalframe['Quarter end'] < '10/15/2005') & (finalframe['Quarter end'] >= '7/15/2005'),
            (finalframe['Quarter end'] < '7/15/2005') & (finalframe['Quarter end'] >= '4/15/2005'),
            (finalframe['Quarter end'] < '4/15/2005') & (finalframe['Quarter end'] >= '1/15/2005'),
            (finalframe['Quarter end'] < '1/15/2005') & (finalframe['Quarter end'] >= '10/15/2004'),
            (finalframe['Quarter end'] < '10/15/2004') & (finalframe['Quarter end'] >= '7/15/2004'),
            (finalframe['Quarter end'] < '7/15/2004') & (finalframe['Quarter end'] >= '4/15/2004'),
            (finalframe['Quarter end'] < '4/15/2004') & (finalframe['Quarter end'] >= '1/15/2004'),
            (finalframe['Quarter end'] < '1/15/2004') & (finalframe['Quarter end'] >= '10/15/2003'),
            (finalframe['Quarter end'] < '10/15/2003') & (finalframe['Quarter end'] >= '7/15/2003'),
            (finalframe['Quarter end'] < '7/15/2003') & (finalframe['Quarter end'] >= '4/15/2003'),
            (finalframe['Quarter end'] < '4/15/2003') & (finalframe['Quarter end'] >= '1/15/2003'),
            (finalframe['Quarter end'] < '1/15/2003') & (finalframe['Quarter end'] >= '10/15/2002'),
            (finalframe['Quarter end'] < '10/15/2002') & (finalframe['Quarter end'] >= '7/15/2002'),
            (finalframe['Quarter end'] < '7/15/2002') & (finalframe['Quarter end'] >= '4/15/2002'),
            (finalframe['Quarter end'] < '4/15/2002') & (finalframe['Quarter end'] >= '1/15/2002'),
            (finalframe['Quarter end'] < '1/15/2002') & (finalframe['Quarter end'] >= '10/15/2001'),
            (finalframe['Quarter end'] < '10/15/2001') & (finalframe['Quarter end'] >= '7/15/2001'),
            (finalframe['Quarter end'] < '7/15/2001') & (finalframe['Quarter end'] >= '4/15/2001'),
            (finalframe['Quarter end'] < '4/15/2001') & (finalframe['Quarter end'] >= '1/15/2001'),
            (finalframe['Quarter end'] < '1/15/2001') & (finalframe['Quarter end'] >= '10/15/2000'),
            (finalframe['Quarter end'] < '10/15/2000') & (finalframe['Quarter end'] >= '7/15/2000'),
            (finalframe['Quarter end'] < '7/15/2000') & (finalframe['Quarter end'] >= '4/15/2000'),
            (finalframe['Quarter end'] < '4/15/2000') & (finalframe['Quarter end'] >= '1/15/2000'),
            (finalframe['Quarter end'] < '1/15/2000') & (finalframe['Quarter end'] >= '10/15/1999'),
            (finalframe['Quarter end'] < '10/15/1999') & (finalframe['Quarter end'] >= '7/15/1999'),
            (finalframe['Quarter end'] < '7/15/1999') & (finalframe['Quarter end'] >= '4/15/1999'),
            (finalframe['Quarter end'] < '4/15/1999') & (finalframe['Quarter end'] >= '1/15/1999'),
            (finalframe['Quarter end'] < '1/15/1999') & (finalframe['Quarter end'] >= '10/15/1998'),
            (finalframe['Quarter end'] < '10/15/1998') & (finalframe['Quarter end'] >= '7/15/1998'),
            (finalframe['Quarter end'] < '7/15/1998') & (finalframe['Quarter end'] >= '4/15/1998'),
            (finalframe['Quarter end'] < '4/15/1998') & (finalframe['Quarter end'] >= '1/15/1998'),
            (finalframe['Quarter end'] < '1/15/1998') & (finalframe['Quarter end'] >= '10/15/1997'),
            (finalframe['Quarter end'] < '10/15/1997') & (finalframe['Quarter end'] >= '7/15/1997'),
            (finalframe['Quarter end'] < '7/15/1997') & (finalframe['Quarter end'] >= '4/15/1997'),
            (finalframe['Quarter end'] < '4/15/1997') & (finalframe['Quarter end'] >= '1/15/1997'),
            (finalframe['Quarter end'] < '1/15/1997') & (finalframe['Quarter end'] >= '10/15/1996'),
            (finalframe['Quarter end'] < '10/15/1996') & (finalframe['Quarter end'] >= '7/15/1996'),
            (finalframe['Quarter end'] < '7/15/1996') & (finalframe['Quarter end'] >= '4/15/1996'),
            (finalframe['Quarter end'] < '4/15/1996') & (finalframe['Quarter end'] >= '1/15/1996'),
            (finalframe['Quarter end'] < '1/15/1996') & (finalframe['Quarter end'] >= '10/15/1995'),
            (finalframe['Quarter end'] < '10/15/1995') & (finalframe['Quarter end'] >= '7/15/1995'),
            (finalframe['Quarter end'] < '7/15/1995') & (finalframe['Quarter end'] >= '4/15/1995'),
            (finalframe['Quarter end'] < '4/15/1995') & (finalframe['Quarter end'] >= '1/15/1995'),
            (finalframe['Quarter end'] < '1/15/1995') & (finalframe['Quarter end'] >= '10/15/1994'),
            (finalframe['Quarter end'] < '10/15/1994') & (finalframe['Quarter end'] >= '7/15/1994'),
            (finalframe['Quarter end'] < '7/15/1994') & (finalframe['Quarter end'] >= '4/15/1994'),
            (finalframe['Quarter end'] < '4/15/1994') & (finalframe['Quarter end'] >= '1/15/1994'),
            (finalframe['Quarter end'] < '1/15/1994') & (finalframe['Quarter end'] >= '10/15/1993'),
            (finalframe['Quarter end'] < '10/15/1993') & (finalframe['Quarter end'] >= '7/15/1993'),
            (finalframe['Quarter end'] < '7/15/1993') & (finalframe['Quarter end'] >= '4/15/1993')]

quarters = ['2019Q4', '2019Q3', '2019Q2', '2019Q1', '2018Q4', '2018Q3', '2018Q2', '2018Q1',
            '2017Q4', '2017Q3', '2017Q2', '2017Q1', '2016Q4', '2016Q3', '2016Q2', '2016Q1',
            '2015Q4', '2015Q3', '2015Q2', '2015Q1', '2014Q4', '2014Q3', '2014Q2', '2014Q1',
            '2013Q4', '2013Q3', '2013Q2', '2013Q1', '2012Q4', '2012Q3', '2012Q2', '2012Q1',
            '2011Q4', '2011Q3', '2011Q2', '2011Q1', '2010Q4', '2010Q3', '2010Q2', '2010Q1',
            '2009Q4', '2009Q3', '2009Q2', '2009Q1', '2008Q4', '2008Q3', '2008Q2', '2008Q1',
            '2007Q4', '2007Q3', '2007Q2', '2007Q1', '2006Q4', '2006Q3', '2006Q2', '2006Q1',
            '2005Q4', '2005Q3', '2005Q2', '2005Q1', '2004Q4', '2004Q3', '2004Q2', '2004Q1',
            '2003Q4', '2003Q3', '2003Q2', '2003Q1', '2002Q4', '2002Q3', '2002Q2', '2002Q1',
            '2001Q4', '2001Q3', '2001Q2', '2001Q1', '2000Q4', '2000Q3', '2000Q2', '2000Q1',
            '1999Q4', '1999Q3', '1999Q2', '1999Q1', '1998Q4', '1998Q3', '1998Q2', '1998Q1',
            '1997Q4', '1997Q3', '1997Q2', '1997Q1', '1996Q4', '1996Q3', '1996Q2', '1996Q1',
            '1995Q4', '1995Q3', '1995Q2', '1995Q1', '1994Q4', '1994Q3', '1994Q2', '1994Q1',
            '1993Q4', '1993Q3', '1993Q2']

finalframe['Quarter'] = np.select(periods, quarters)
finalframe.head()

Unnamed: 0,Quarter end,Shares split adjusted,Price,EPS basic,EPS diluted,Dividend per share,Dividend payout ratio,P/E ratio,Long-term debt to equity ratio,Symbol,Quarter
0,2019-09-30,67151778.0,63.99,0.59,0.58,0.035,0.0488,21.69,0.1859,AAN,2019Q3
1,2019-06-30,67547497.0,57.23,0.63,0.62,0.035,0.0464,19.94,0.188,AAN,2019Q2
2,2019-03-31,67677449.0,48.63,0.83,0.82,0.035,0.0459,17.49,0.2251,AAN,2019Q1
3,2018-12-31,67202919.0,47.0,0.9,0.89,0.04,0.0455,10.83,0.1937,AAN,2018Q4
4,2018-09-30,68594784.0,48.86,0.64,0.62,0.03,0.0267,12.0,0.1686,AAN,2018Q3


In [9]:
# first iteration
# AAP - consistently 3-4 weeks late in Q1 or Q2
# ADCT - 1 month late 2009Q2
# ALXN - 1 month late 2005Q3
# ROP - 1 month late 2003Q2
# SAI - 1 month late 2014Q4
# SANM - 1 month late 1996Q2
# TXT - 1 month late 2006Q2
# WFMI - consistently 3 weeks late in Q4
pd.set_option('display.max_rows', None)
finalframe[finalframe.duplicated(['Symbol', 'Quarter'], keep = False)]

Unnamed: 0,Quarter end,Shares split adjusted,Price,EPS basic,EPS diluted,Dividend per share,Dividend payout ratio,P/E ratio,Long-term debt to equity ratio,Symbol,Quarter
206,2018-07-14,74081258.0,122.78,1.59,1.59,0.06,0.0332,18.06,0.2857,AAP,2018Q2
207,2018-04-21,74031311.0,111.81,1.85,1.84,0.06,0.0352,17.42,0.2946,AAP,2018Q2
209,2017-10-07,73898043.0,98.81,1.3,1.3,0.06,0.0502,19.72,0.3231,AAP,2017Q3
210,2017-07-15,73862588.0,125.42,1.18,1.17,0.06,0.0477,22.72,0.3336,AAP,2017Q3
213,2016-10-08,73653625.0,159.01,1.54,1.53,0.06,0.039,25.69,0.3651,AAP,2016Q3
214,2016-07-16,73640170.0,149.65,1.69,1.68,0.06,0.0384,22.88,0.4255,AAP,2016Q3
217,2015-10-10,73236597.0,172.96,1.64,1.63,0.06,0.0349,25.29,0.5359,AAP,2015Q3
218,2015-07-18,73217397.0,156.26,2.04,2.03,0.06,0.0348,23.32,0.6348,AAP,2015Q3
222,2014-07-12,72975452.0,127.31,1.91,1.89,0.06,0.0397,22.41,0.9908,AAP,2014Q2
223,2014-04-19,72954638.0,119.38,2.02,2.01,0.06,0.0419,22.44,1.2011,AAP,2014Q2


#### 2nd iteration - change to 3 weeks

In [10]:
# period filters
periods_2 = [(finalframe['Quarter end'] >='10/22/2019'), 
            (finalframe['Quarter end'] < '10/22/2019') & (finalframe['Quarter end'] >= '7/22/2019'),
            (finalframe['Quarter end'] < '7/22/2019') & (finalframe['Quarter end'] >= '4/25/2019'),
            (finalframe['Quarter end'] < '4/25/2019') & (finalframe['Quarter end'] >= '1/22/2019'),
            (finalframe['Quarter end'] < '1/22/2019') & (finalframe['Quarter end'] >= '10/22/2018'),
            (finalframe['Quarter end'] < '10/22/2018') & (finalframe['Quarter end'] >= '7/22/2018'),
            (finalframe['Quarter end'] < '7/22/2018') & (finalframe['Quarter end'] >= '4/25/2018'),
            (finalframe['Quarter end'] < '4/25/2018') & (finalframe['Quarter end'] >= '1/22/2018'),
            (finalframe['Quarter end'] < '1/22/2018') & (finalframe['Quarter end'] >= '10/22/2017'),
            (finalframe['Quarter end'] < '10/22/2017') & (finalframe['Quarter end'] >= '7/22/2017'),
            (finalframe['Quarter end'] < '7/22/2017') & (finalframe['Quarter end'] >= '4/25/2017'),
            (finalframe['Quarter end'] < '4/25/2017') & (finalframe['Quarter end'] >= '1/22/2017'),
            (finalframe['Quarter end'] < '1/22/2017') & (finalframe['Quarter end'] >= '10/22/2016'),
            (finalframe['Quarter end'] < '10/22/2016') & (finalframe['Quarter end'] >= '7/22/2016'),
            (finalframe['Quarter end'] < '7/22/2016') & (finalframe['Quarter end'] >= '4/25/2016'),
            (finalframe['Quarter end'] < '4/25/2016') & (finalframe['Quarter end'] >= '1/22/2016'),
            (finalframe['Quarter end'] < '1/22/2016') & (finalframe['Quarter end'] >= '10/22/2015'),
            (finalframe['Quarter end'] < '10/22/2015') & (finalframe['Quarter end'] >= '7/22/2015'),
            (finalframe['Quarter end'] < '7/22/2015') & (finalframe['Quarter end'] >= '4/25/2015'),
            (finalframe['Quarter end'] < '4/25/2015') & (finalframe['Quarter end'] >= '1/22/2015'),
            (finalframe['Quarter end'] < '1/22/2015') & (finalframe['Quarter end'] >= '10/22/2014'),
            (finalframe['Quarter end'] < '10/22/2014') & (finalframe['Quarter end'] >= '7/22/2014'),
            (finalframe['Quarter end'] < '7/22/2014') & (finalframe['Quarter end'] >= '4/25/2014'),
            (finalframe['Quarter end'] < '4/25/2014') & (finalframe['Quarter end'] >= '1/22/2014'),
            (finalframe['Quarter end'] < '1/22/2014') & (finalframe['Quarter end'] >= '10/22/2013'),
            (finalframe['Quarter end'] < '10/22/2013') & (finalframe['Quarter end'] >= '7/22/2013'),
            (finalframe['Quarter end'] < '7/22/2013') & (finalframe['Quarter end'] >= '4/25/2013'),
            (finalframe['Quarter end'] < '4/25/2013') & (finalframe['Quarter end'] >= '1/22/2013'),
            (finalframe['Quarter end'] < '1/22/2013') & (finalframe['Quarter end'] >= '10/22/2012'),
            (finalframe['Quarter end'] < '10/22/2012') & (finalframe['Quarter end'] >= '7/22/2012'),
            (finalframe['Quarter end'] < '7/22/2012') & (finalframe['Quarter end'] >= '4/25/2012'),
            (finalframe['Quarter end'] < '4/25/2012') & (finalframe['Quarter end'] >= '1/22/2012'),
            (finalframe['Quarter end'] < '1/22/2012') & (finalframe['Quarter end'] >= '10/22/2011'),
            (finalframe['Quarter end'] < '10/22/2011') & (finalframe['Quarter end'] >= '7/22/2011'),
            (finalframe['Quarter end'] < '7/22/2011') & (finalframe['Quarter end'] >= '4/25/2011'),
            (finalframe['Quarter end'] < '4/25/2011') & (finalframe['Quarter end'] >= '1/22/2011'),
            (finalframe['Quarter end'] < '1/22/2011') & (finalframe['Quarter end'] >= '10/22/2010'),
            (finalframe['Quarter end'] < '10/22/2010') & (finalframe['Quarter end'] >= '7/22/2010'),
            (finalframe['Quarter end'] < '7/22/2010') & (finalframe['Quarter end'] >= '4/25/2010'),
            (finalframe['Quarter end'] < '4/25/2010') & (finalframe['Quarter end'] >= '1/22/2010'),
            (finalframe['Quarter end'] < '1/22/2010') & (finalframe['Quarter end'] >= '10/22/2009'),
            (finalframe['Quarter end'] < '10/22/2009') & (finalframe['Quarter end'] >= '7/22/2009'),
            (finalframe['Quarter end'] < '7/22/2009') & (finalframe['Quarter end'] >= '4/25/2009'),
            (finalframe['Quarter end'] < '4/25/2009') & (finalframe['Quarter end'] >= '1/22/2009'),
            (finalframe['Quarter end'] < '1/22/2009') & (finalframe['Quarter end'] >= '10/22/2008'),
            (finalframe['Quarter end'] < '10/22/2008') & (finalframe['Quarter end'] >= '7/22/2008'),
            (finalframe['Quarter end'] < '7/22/2008') & (finalframe['Quarter end'] >= '4/25/2008'),
            (finalframe['Quarter end'] < '4/25/2008') & (finalframe['Quarter end'] >= '1/22/2008'),
            (finalframe['Quarter end'] < '1/22/2008') & (finalframe['Quarter end'] >= '10/22/2007'),
            (finalframe['Quarter end'] < '10/22/2007') & (finalframe['Quarter end'] >= '7/22/2007'),
            (finalframe['Quarter end'] < '7/22/2007') & (finalframe['Quarter end'] >= '4/25/2007'),
            (finalframe['Quarter end'] < '4/25/2007') & (finalframe['Quarter end'] >= '1/22/2007'),
            (finalframe['Quarter end'] < '1/22/2007') & (finalframe['Quarter end'] >= '10/22/2006'),
            (finalframe['Quarter end'] < '10/22/2006') & (finalframe['Quarter end'] >= '7/22/2006'),
            (finalframe['Quarter end'] < '7/22/2006') & (finalframe['Quarter end'] >= '4/25/2006'),
            (finalframe['Quarter end'] < '4/25/2006') & (finalframe['Quarter end'] >= '1/22/2006'),
            (finalframe['Quarter end'] < '1/22/2006') & (finalframe['Quarter end'] >= '10/22/2005'),
            (finalframe['Quarter end'] < '10/22/2005') & (finalframe['Quarter end'] >= '7/22/2005'),
            (finalframe['Quarter end'] < '7/22/2005') & (finalframe['Quarter end'] >= '4/25/2005'),
            (finalframe['Quarter end'] < '4/25/2005') & (finalframe['Quarter end'] >= '1/22/2005'),
            (finalframe['Quarter end'] < '1/22/2005') & (finalframe['Quarter end'] >= '10/22/2004'),
            (finalframe['Quarter end'] < '10/22/2004') & (finalframe['Quarter end'] >= '7/22/2004'),
            (finalframe['Quarter end'] < '7/22/2004') & (finalframe['Quarter end'] >= '4/25/2004'),
            (finalframe['Quarter end'] < '4/25/2004') & (finalframe['Quarter end'] >= '1/22/2004'),
            (finalframe['Quarter end'] < '1/22/2004') & (finalframe['Quarter end'] >= '10/22/2003'),
            (finalframe['Quarter end'] < '10/22/2003') & (finalframe['Quarter end'] >= '7/22/2003'),
            (finalframe['Quarter end'] < '7/22/2003') & (finalframe['Quarter end'] >= '4/25/2003'),
            (finalframe['Quarter end'] < '4/25/2003') & (finalframe['Quarter end'] >= '1/22/2003'),
            (finalframe['Quarter end'] < '1/22/2003') & (finalframe['Quarter end'] >= '10/22/2002'),
            (finalframe['Quarter end'] < '10/22/2002') & (finalframe['Quarter end'] >= '7/22/2002'),
            (finalframe['Quarter end'] < '7/22/2002') & (finalframe['Quarter end'] >= '4/25/2002'),
            (finalframe['Quarter end'] < '4/25/2002') & (finalframe['Quarter end'] >= '1/22/2002'),
            (finalframe['Quarter end'] < '1/22/2002') & (finalframe['Quarter end'] >= '10/22/2001'),
            (finalframe['Quarter end'] < '10/22/2001') & (finalframe['Quarter end'] >= '7/22/2001'),
            (finalframe['Quarter end'] < '7/22/2001') & (finalframe['Quarter end'] >= '4/25/2001'),
            (finalframe['Quarter end'] < '4/25/2001') & (finalframe['Quarter end'] >= '1/22/2001'),
            (finalframe['Quarter end'] < '1/22/2001') & (finalframe['Quarter end'] >= '10/22/2000'),
            (finalframe['Quarter end'] < '10/22/2000') & (finalframe['Quarter end'] >= '7/22/2000'),
            (finalframe['Quarter end'] < '7/22/2000') & (finalframe['Quarter end'] >= '4/25/2000'),
            (finalframe['Quarter end'] < '4/25/2000') & (finalframe['Quarter end'] >= '1/22/2000'),
            (finalframe['Quarter end'] < '1/22/2000') & (finalframe['Quarter end'] >= '10/22/1999'),
            (finalframe['Quarter end'] < '10/22/1999') & (finalframe['Quarter end'] >= '7/22/1999'),
            (finalframe['Quarter end'] < '7/22/1999') & (finalframe['Quarter end'] >= '4/25/1999'),
            (finalframe['Quarter end'] < '4/25/1999') & (finalframe['Quarter end'] >= '1/22/1999'),
            (finalframe['Quarter end'] < '1/22/1999') & (finalframe['Quarter end'] >= '10/22/1998'),
            (finalframe['Quarter end'] < '10/22/1998') & (finalframe['Quarter end'] >= '7/22/1998'),
            (finalframe['Quarter end'] < '7/22/1998') & (finalframe['Quarter end'] >= '4/25/1998'),
            (finalframe['Quarter end'] < '4/25/1998') & (finalframe['Quarter end'] >= '1/22/1998'),
            (finalframe['Quarter end'] < '1/22/1998') & (finalframe['Quarter end'] >= '10/22/1997'),
            (finalframe['Quarter end'] < '10/22/1997') & (finalframe['Quarter end'] >= '7/22/1997'),
            (finalframe['Quarter end'] < '7/22/1997') & (finalframe['Quarter end'] >= '4/25/1997'),
            (finalframe['Quarter end'] < '4/25/1997') & (finalframe['Quarter end'] >= '1/22/1997'),
            (finalframe['Quarter end'] < '1/22/1997') & (finalframe['Quarter end'] >= '10/22/1996'),
            (finalframe['Quarter end'] < '10/22/1996') & (finalframe['Quarter end'] >= '7/22/1996'),
            (finalframe['Quarter end'] < '7/22/1996') & (finalframe['Quarter end'] >= '4/25/1996'),
            (finalframe['Quarter end'] < '4/25/1996') & (finalframe['Quarter end'] >= '1/22/1996'),
            (finalframe['Quarter end'] < '1/22/1996') & (finalframe['Quarter end'] >= '10/22/1995'),
            (finalframe['Quarter end'] < '10/22/1995') & (finalframe['Quarter end'] >= '7/22/1995'),
            (finalframe['Quarter end'] < '7/22/1995') & (finalframe['Quarter end'] >= '4/25/1995'),
            (finalframe['Quarter end'] < '4/25/1995') & (finalframe['Quarter end'] >= '1/22/1995'),
            (finalframe['Quarter end'] < '1/22/1995') & (finalframe['Quarter end'] >= '10/22/1994'),
            (finalframe['Quarter end'] < '10/22/1994') & (finalframe['Quarter end'] >= '7/22/1994'),
            (finalframe['Quarter end'] < '7/22/1994') & (finalframe['Quarter end'] >= '4/25/1994'),
            (finalframe['Quarter end'] < '4/25/1994') & (finalframe['Quarter end'] >= '1/22/1994'),
            (finalframe['Quarter end'] < '1/22/1994') & (finalframe['Quarter end'] >= '10/22/1993'),
            (finalframe['Quarter end'] < '10/22/1993') & (finalframe['Quarter end'] >= '7/22/1993'),
            (finalframe['Quarter end'] < '7/22/1993') & (finalframe['Quarter end'] >= '4/25/1993')]

quarters_2 = ['2019Q4', '2019Q3', '2019Q2', '2019Q1', '2018Q4', '2018Q3', '2018Q2', '2018Q1',
            '2017Q4', '2017Q3', '2017Q2', '2017Q1', '2016Q4', '2016Q3', '2016Q2', '2016Q1',
            '2015Q4', '2015Q3', '2015Q2', '2015Q1', '2014Q4', '2014Q3', '2014Q2', '2014Q1',
            '2013Q4', '2013Q3', '2013Q2', '2013Q1', '2012Q4', '2012Q3', '2012Q2', '2012Q1',
            '2011Q4', '2011Q3', '2011Q2', '2011Q1', '2010Q4', '2010Q3', '2010Q2', '2010Q1',
            '2009Q4', '2009Q3', '2009Q2', '2009Q1', '2008Q4', '2008Q3', '2008Q2', '2008Q1',
            '2007Q4', '2007Q3', '2007Q2', '2007Q1', '2006Q4', '2006Q3', '2006Q2', '2006Q1',
            '2005Q4', '2005Q3', '2005Q2', '2005Q1', '2004Q4', '2004Q3', '2004Q2', '2004Q1',
            '2003Q4', '2003Q3', '2003Q2', '2003Q1', '2002Q4', '2002Q3', '2002Q2', '2002Q1',
            '2001Q4', '2001Q3', '2001Q2', '2001Q1', '2000Q4', '2000Q3', '2000Q2', '2000Q1',
            '1999Q4', '1999Q3', '1999Q2', '1999Q1', '1998Q4', '1998Q3', '1998Q2', '1998Q1',
            '1997Q4', '1997Q3', '1997Q2', '1997Q1', '1996Q4', '1996Q3', '1996Q2', '1996Q1',
            '1995Q4', '1995Q3', '1995Q2', '1995Q1', '1994Q4', '1994Q3', '1994Q2', '1994Q1',
            '1993Q4', '1993Q3', '1993Q2']

finalframe['Quarter'] = np.select(periods_2, quarters_2)
finalframe.head()

Unnamed: 0,Quarter end,Shares split adjusted,Price,EPS basic,EPS diluted,Dividend per share,Dividend payout ratio,P/E ratio,Long-term debt to equity ratio,Symbol,Quarter
0,2019-09-30,67151778.0,63.99,0.59,0.58,0.035,0.0488,21.69,0.1859,AAN,2019Q3
1,2019-06-30,67547497.0,57.23,0.63,0.62,0.035,0.0464,19.94,0.188,AAN,2019Q2
2,2019-03-31,67677449.0,48.63,0.83,0.82,0.035,0.0459,17.49,0.2251,AAN,2019Q1
3,2018-12-31,67202919.0,47.0,0.9,0.89,0.04,0.0455,10.83,0.1937,AAN,2018Q4
4,2018-09-30,68594784.0,48.86,0.64,0.62,0.03,0.0267,12.0,0.1686,AAN,2018Q3


In [15]:
df2 = finalframe[finalframe.duplicated(['Symbol', 'Quarter'], keep = False)]
df2['Symbol'].unique()

array(['AAP', 'ADCT', 'ALXN', 'HRL', 'MDT', 'NTAP', 'PDCO', 'ROP', 'SAI',
       'SANM', 'TXT'], dtype=object)

In [16]:
df2

Unnamed: 0,Quarter end,Shares split adjusted,Price,EPS basic,EPS diluted,Dividend per share,Dividend payout ratio,P/E ratio,Long-term debt to equity ratio,Symbol,Quarter
218,2015-07-18,73217400.0,156.26,2.04,2.03,0.06,0.0348,23.32,0.6348,AAP,2015Q2
219,2015-04-25,73178230.0,154.01,2.02,2.0,0.06,0.0355,22.95,0.7499,AAP,2015Q2
242,2009-07-18,95449170.0,43.05,0.84,0.83,0.06,0.0897,16.49,0.2208,AAP,2009Q2
243,2009-04-25,95241200.0,37.07,0.99,0.98,0.06,0.0915,14.89,0.2365,AAP,2009Q2
1518,2009-09-30,96626430.0,8.34,-0.21,-0.21,0.0,,,2.1095,ADCT,2009Q3
1519,2009-07-31,96625950.0,8.34,-0.01,-0.01,0.0,,,1.7159,ADCT,2009Q3
4039,2005-12-31,125378800.0,6.04,-0.74,-0.74,0.0,,,1.8328,ALXN,2005Q4
4040,2005-10-31,123658300.0,6.8,-1.16,-1.16,0.0,,,1.4775,ALXN,2005Q4
28439,2016-04-24,529860500.0,41.61,0.41,0.4,0.145,0.3643,30.15,0.0581,HRL,2016Q1
28440,2016-01-24,529917500.0,36.66,0.44,0.43,0.145,0.3671,28.87,0.0601,HRL,2016Q1


## Compile and Export for Manual Editing in Excel

In [14]:
finalframe.to_csv(final_data_v2, index = False)