In [1]:
import pandas as pd
import yfinance as yf
import datetime

In [28]:
start, end = "2021-07-01", "2021-08-01"
ticker = 'msci'

#### Generate dummy reference data
Errors and changes added to reference_df

In [53]:
n, m = 18, 8
date_range = pd.date_range(start=start, end=end)
currency = ['USD'] * (len(date_range) - n) + ['EUR'] * n
industry = ['Technology'] * (len(date_range) - m) + ['Tech'] * 5 + ['Technology'] * (m-5)
region = ['EMEA'] * (len(date_range) - 2) + ['APAC'] *2 

d = {'day': date_range, 'currency': currency, 'industry': industry, 'region': region}
reference_df = pd.DataFrame(data=d)
reference_df.loc[8:10, ['currency', 'industry', 'region']] = ['N/A', 'N/A', 'N/A']
reference_df.set_index('day', inplace=True)
reference_df

Unnamed: 0_level_0,currency,industry,region
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-07-01,USD,Technology,EMEA
2021-07-02,USD,Technology,EMEA
2021-07-03,USD,Technology,EMEA
2021-07-04,USD,Technology,EMEA
2021-07-05,USD,Technology,EMEA
2021-07-06,USD,Technology,EMEA
2021-07-07,USD,Technology,EMEA
2021-07-08,USD,Technology,EMEA
2021-07-09,,,
2021-07-10,,,


#### Get and actual time-series data of a stock (variable=ticker)
stock_price

In [54]:
stock_price = yf.Ticker(ticker).history(start=start, end=end)
stock_price.index = pd.to_datetime(stock_price.index)
stock_price = stock_price.iloc[:, :5]
stock_price

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-06-30,540.140015,542.02002,532.539978,533.080017,426700
2021-07-01,532.48999,535.859985,531.630005,534.030029,329700
2021-07-02,536.72998,542.090027,534.679993,541.909973,254600
2021-07-06,543.299988,551.630005,541.390015,550.02002,351500
2021-07-07,552.679993,557.159973,550.02002,555.77002,246600
2021-07-08,551.659973,554.849976,545.98999,553.539978,324100
2021-07-09,555.97998,560.390015,552.849976,560.22998,308600
2021-07-12,561.0,563.460022,556.52002,559.950012,221400
2021-07-13,559.640015,567.01001,554.98999,557.570007,233900
2021-07-14,558.090027,559.789978,551.900024,555.190002,194200


#### Drop non trading days from the reference_df

In [55]:
reference_df = reference_df[reference_df.index.isin(stock_price.index)]

In [56]:
reference_df

Unnamed: 0_level_0,currency,industry,region
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-07-01,USD,Technology,EMEA
2021-07-02,USD,Technology,EMEA
2021-07-06,USD,Technology,EMEA
2021-07-07,USD,Technology,EMEA
2021-07-08,USD,Technology,EMEA
2021-07-09,,,
2021-07-12,USD,Technology,EMEA
2021-07-13,USD,Technology,EMEA
2021-07-14,USD,Technology,EMEA
2021-07-15,EUR,Technology,EMEA


#### Capture changes in reference_df
valid_from, valid_to, currency, industry, region, changes

In [58]:
pd.DataFrame(columns=['valid_from', 'valid_to', 'currency', 'industry', 'region', 'changes'])

Unnamed: 0,valid_from,valid_to,currency,industry,region,changes


In [84]:
change = []
for i in range(len(reference_df.index)-1):
    for j in range(len(reference_df.columns)):
        chg = (i, j, reference_df.iloc[i, j]==reference_df.iloc[i+1, j])
        change.append(chg)
print(change)

[(0, 0, True), (0, 1, True), (0, 2, True), (1, 0, True), (1, 1, True), (1, 2, True), (2, 0, True), (2, 1, True), (2, 2, True), (3, 0, True), (3, 1, True), (3, 2, True), (4, 0, False), (4, 1, False), (4, 2, False), (5, 0, False), (5, 1, False), (5, 2, False), (6, 0, True), (6, 1, True), (6, 2, True), (7, 0, True), (7, 1, True), (7, 2, True), (8, 0, False), (8, 1, True), (8, 2, True), (9, 0, True), (9, 1, True), (9, 2, True), (10, 0, True), (10, 1, True), (10, 2, True), (11, 0, True), (11, 1, True), (11, 2, True), (12, 0, True), (12, 1, True), (12, 2, True), (13, 0, True), (13, 1, True), (13, 2, True), (14, 0, True), (14, 1, True), (14, 2, True), (15, 0, True), (15, 1, False), (15, 2, True), (16, 0, True), (16, 1, True), (16, 2, True), (17, 0, True), (17, 1, True), (17, 2, True), (18, 0, True), (18, 1, True), (18, 2, True), (19, 0, True), (19, 1, False), (19, 2, True)]


#### Show when changes occur in the reference_df
changes_df

In [90]:
changes_df = pd.DataFrame(change, columns= ['row_num', 'col_num', 'unchanged'])
changes_df = changes_df[changes_df['unchanged']==False]
changes_df

Unnamed: 0,row_num,col_num,unchanged
12,4,0,False
13,4,1,False
14,4,2,False
15,5,0,False
16,5,1,False
17,5,2,False
24,8,0,False
46,15,1,False
58,19,1,False
