In [None]:
import pandas as pd
import yfinance as yf
import datetime

# display entire table
pd.set_option('display.max_rows', None)

In [None]:
start, end = "2021-07-01", "2021-08-05"
ticker = 'aapl'

#### Generate dummy reference data
Errors and changes added to reference_df

In [None]:
n, m = 18, 8
date_range = pd.date_range(start=start, end=end)
currency = ['USD'] * (len(date_range) - n) + ['EUR'] * n
industry = ['Technology'] * (len(date_range) - m) + ['Tech'] * 5 + ['Technology'] * (m-5)
region = ['EMEA'] * (len(date_range) - 2) + ['APAC'] *2 

d = {'day': date_range, 'currency': currency, 'industry': industry, 'region': region}
reference_df = pd.DataFrame(data=d)
reference_df.loc[8:10, ['currency', 'industry', 'region']] = ['N/A', 'N/A', 'N/A']
reference_df.set_index('day', inplace=True)
reference_df

#### Get historical stock prices (variable=ticker)
stock_price

In [None]:
stock_price = yf.Ticker(ticker).history(start=start, end=end)
stock_price.index = pd.to_datetime(stock_price.index)
stock_price = stock_price.iloc[:, :5]
stock_price

#### Drop non trading days from the reference_df

In [None]:
reference_df = reference_df[reference_df.index.isin(stock_price.index)]
reference_df

#### Capture changes in reference_df

In [None]:
ref_df_unpivoted = reference_df.reset_index().melt(id_vars='day').set_index(['day', 'variable'])
ref_df_unpivoted.sort_index(inplace=True)
ref_df_unpivoted.tail(12)

In [None]:
from pprint import pprint
change = []
for i in range(len(reference_df.index)-1):
    for j in range(len(reference_df.columns)):
        chg = (reference_df.iloc[[i]].index[0], reference_df.columns[j], reference_df.iloc[i, j]!=reference_df.iloc[i+1, j])
        change.append(chg)

# List of tuples - similar to an unpivoted structure-
# within each tuple the 1st item: day, 2nd: field, 3rd: True/False indicating changes
pprint(change[10:20])

changes_df = pd.DataFrame(change, columns= ['day', 'variable', 'changed']).set_index(['day', 'variable'])

# keep only the changes
changes_df = changes_df[changes_df['changed']==True].drop(['changed'], axis=1)

# populate changes_df with data
changes_df = changes_df.merge(ref_df_unpivoted, left_index=True, right_index=True)
changes_df = changes_df.sort_index()


display(changes_df)

In [None]:
forever = pd.to_datetime('2222-12-31') - pd.Timedelta(seconds=1)

reference_df_start = reference_df.iloc[[0]].reset_index()
reference_df_start.rename(columns={'day': 'valid_from'}, inplace=True)
reference_df_start['valid_from'] = reference_df_start['valid_from'] # + pd.Timedelta(seconds=1)
# adding 1 day minus a second

#every time where there is a change, the valid_to changes to "forever - 1 second" and valid_from to the day of a change
reference_df_start['valid_to'] = forever

reference_df_start = reference_df_start[['valid_from', 'valid_to', 'currency', 'industry', 'region']]
reference_df_start.set_index('valid_from', inplace=True)
reference_df_start

In [None]:
# adding changes
changes_pivot_df = changes_df.reset_index()
changes_pivot_df = changes_pivot_df.pivot(index='day', columns='variable', values='value')
changes_pivot_df.fillna(method='ffill', inplace=True)
display(changes_pivot_df)

In [None]:
df = pd.concat([reference_df_start, changes_pivot_df])
df.index.rename('valid_from', inplace=True)
df = df.reset_index()
display(df)
# df.reset_index()

In [None]:
for row in range(df.shape[0]-1):
    valid_to = df.iloc[row + 1 , 0] - pd.Timedelta(seconds=1)
    df.loc[row, 'valid_to'] = valid_to
    
# the last value should have no expiry -> valid to = forever
df.iloc[-1, 1] =  forever
display(df)