# Daily Stock Prices
##### Purpose: Pull daily stock data using LSEG API for specified rics 
##### Input: List of stock closing price data from start to end date 
##### For information on the LSEG Eikon API: 
#####         --General: https://developers.lseg.com/en/api-catalog/eikon/eikon-data-api
#####         --API Key: https://developers.lseg.com/en/article-catalog/article/consume-real-time-data-with-refinitiv-data-platform
##### Contact: brds@hbs.edu


## Set Up

### Import Libraries

In [None]:
## Import 
import eikon as ek # Eikon Python wrapper package
import pandas as pd 
from datetime import datetime, timedelta
import sys
import os
import time
import logging
import IPython


### Set API Key

In [None]:
# Set API Key (see notes at top for how to obtain your API Key)
api_key_file = "Data\API_Key.txt"
with open(api_key_file, 'r') as file:
    api_key = file.read().strip()

ek.set_app_key(api_key)

# Or set API Key:  ek.set_app_key('YOUR_KEY_HERE') 

## Parameters

### Import Data

In [None]:
# Import Ric List 
ric_list = pd.read_excel("Data\Rics_List.xlsx")
display(ric_list.head(5))


### Parameters

In [None]:
# Set Values
## Chunk Size (set how many RICs to pull at once, based off how many days being pulled -- this helps respect API limits)
chunk_size = 100 # Set Chunk size to respect API limits

## Currency 
currency = 'USD' 

## Dates
start_date_str = '2024-08-01'
end_date_str = '2024-08-15'

## Retry attempts if API fails 
max_retries = 3

# Create Date variables from strings 
start_date = datetime.strptime(start_date_str, '%Y-%m-%d')
end_date = datetime.strptime(end_date_str, '%Y-%m-%d')
dates = pd.date_range(start=start_date_str, end=end_date_str).to_frame(index=False, name="Date")




## Pull down data

In [None]:
# Pull Down Vars
## Loop thru each date (one day at a time), for chunk_size number of rics at a time 

## Set Loop Vars
last = len(ric_list)
data = pd.DataFrame()


## Loop thru all dates 
current_date = start_date
while current_date <= end_date: 
    # Set Dates:
    current_date_str = current_date.strftime('%Y-%m-%d') # String of current date being looped on
    close_param = f'TR.PriceClose(SDate={current_date_str},EDate={current_date_str},Frq=D, CURN:{currency})' # Parameter into get_data (closing price)
    date_param = f'TR.PriceClose(SDate={current_date_str},EDate={current_date_str}, Frq=D).date' # Parameter entered into get_data (date of closing price)

    ## Loop thru all rics
    for start in range(0, last, chunk_size):
        retries=0
        end = start + chunk_size 
        ric_chunk = ric_list['RIC'].iloc[start:end].tolist() #The rics being pulled in this iteration of the loop 

        #Try pulling data 3 times (sometimes API has too many requests)
        while retries < max_retries:
            try:
                
                ## Pull Data 
                print(f"Pulling...")
                data_chunk,e = ek.get_data(ric_chunk,
                            [date_param,
                             close_param])
                
                break


            except Exception as e: 
                retries+=1 
                print(f"Attempt {retries} failed with error: {e}")
                if retries < max_retries: 
                    if retries < 3: 
                        wait_time = 15*retries 
                    else: 
                        wait_time = 60

                    print(f"Waiting {wait_time} seconds before retrying pull...")
                    time.sleep(wait_time) 

                else:
                    print("Max retries reached. Moving to next chunk of rics")                 

        ## Create date column for the date we are requesting data for (
        data_chunk['Date Requested'] = current_date

        data = pd.concat([data, data_chunk])

        print("Pulled - For date: ", current_date, "Rows: ", end)

        
#                print("Error with date:", current_date, "  For rows: ", start, " to ", end)
        
        # Adhere to API limits 
        time.sleep(2) 

    # Increment by 1 day and record data obtained 
    print("Obtained data for: ", current_date)
    current_date += timedelta(days=1)

print ("Finished pulling data")

## Clean Data

### Rename/Reorder columns

In [None]:
# Clean up data
data_orig = data
data.rename(columns={'Date':'Date Obtained'}, inplace =True)
col_order = ['Instrument', 'Date Requested', 'Date Obtained', 'Price Close']
data = data[col_order]
data = data.sort_values(by=['Instrument', 'Date Requested'], ascending=[True, True])
data['Date Obtained'] = pd.to_datetime(data['Date Obtained']).dt.date
#data = data.drop_duplicates()       


#Compare original vs clean data 
data_orig.info()
data.info

In [None]:
# Identify duplicates based on the combination of the pivoting indices and columns
duplicate_mask = data.duplicated(subset=['Date Requested', 'Instrument'], keep=False)

# Filter the DataFrame to get only the duplicated rows
duplicates = data[duplicate_mask]

# Print the duplicated rows
print(duplicates)

### Reshape Data to Wide

In [None]:
## Reshape Data 
data_wide = data.pivot(index='Date Requested', columns = 'Instrument')


# Flatten the MultiIndex columns
data_wide.columns = [f'{instr}_{col}' for col, instr in data_wide.columns]


# Sort the columns based on instrument name
sorted_columns = sorted(data_wide.columns)
data_wide = data_wide[sorted_columns]

# Reset the index to turn 'Date Requested' back into a column
data_wide.reset_index(inplace=True)


### Reshape Data to Wide

## View and Export Data

In [None]:
# View some Data
display(data_wide.head(5))


In [None]:
data.to_excel('Data/Output_DailyStockPrices_Long.xlsx')
data_wide.to_excel('Data/Output_DailyStockPrices.xlsx')
