In [4]:
# Import necessary libraries
import pandas as pd
import yfinance as yf

# Define the start and end dates for the stock data to be retrieved
start = '2019-09-01'
end = '2024-09-01'

# Download stock data for two stocks specified by the user input. User will be prompted to enter in ticker symbols via the 'input' function.
# Assign variable names stock1 and stock2
stock1 = yf.download(input('Enter Stock1:'), start=start, end=end)
stock2 = yf.download(input('Enter Stock2:'), start=start, end=end)

# Merge the two stock dataframes on the 'Date' column, adding suffixes to distinguish between the two stocks
pair = pd.merge(stock1, stock2, on='Date', suffixes=('_stock1', '_stock2'))

# Calculate the spread between the opening prices of the two stocks
pair['Spread Open'] = pair['Open_stock1'] / pair['Open_stock2']

# Calculate the spread between the closing prices of the two stocks
pair['Spread Close'] = pair['Close_stock1'] / pair['Close_stock2']

# Calculate the percentage change in the spread from the previous day's close to today's open
pair['Spread Close to Open'] = (pair['Spread Open'] - pair['Spread Close'].shift(1)) / pair['Spread Close'].shift(1) * 100

# Calculate the percentage change in the spread from today's open to today's close
pair['Spread Open to Close'] = (pair['Spread Close'] - pair['Spread Open']) / pair['Spread Open'] * 100

# Prompt the user to input a gap percentage to filter based on the degree of the gap at the open
gappct = float(input('Gap %:'))
print()

# Filter the pairs where the percentage change in the spread exceeds the specified gap percentage
gap = pair[(pair['Spread Close to Open'] > gappct) | (pair['Spread Close to Open'] < gappct * -1)]

# Separate the filtered data into positive and negative gaps
positivegap = gap[gap['Spread Close to Open'] > 0]
negativegap = gap[gap['Spread Close to Open'] < 0]

# Calculate the total performance of the pairs on positive gaps
sum_positive = positivegap['Spread Open to Close'].sum()
# Calculate the average performance of the pairs on positive gaps
avg_positive = positivegap['Spread Open to Close'].mean()
# Calculate the total performance of the pairs on negative gaps
sum_negative = negativegap['Spread Open to Close'].sum()
# Calculate the average performance of the pairs on negative gaps
avg_negative = negativegap['Spread Open to Close'].mean()

# Count the number of occurrences where the spread reverts positively after a negative gap
num_of_pos_outcomes = len(negativegap[negativegap['Spread Open to Close'] > 0])
# Count the number of occurrences where the spread reverts negatively after a positive gap
num_of_neg_outcomes = len(positivegap[positivegap['Spread Open to Close'] < 0])

# Calculate the odds of the pair reverting after a negative gap
odds_gap_down = round(num_of_pos_outcomes / len(negativegap['Spread Open to Close']) * 100, 1)
# Calculate the odds of the pair reverting after a positive gap
odds_gap_up = round(num_of_neg_outcomes / len(positivegap['Spread Open to Close']) * 100, 1)

# Print the results for positive gaps
print('Total Performance on Positive Gap:', sum_positive)
print('Average Performance on Positive Gap:', avg_positive)
print(f'Odds of Pair Reverting on Positive Gap: {odds_gap_up}%\n')

# Print the results for negative gaps
print('Total Performance on Negative Gap:', sum_negative)
print('Average Performance on Negative Gap:', avg_negative)
print(f'Odds of Pair Reverting on Negative Gap: {odds_gap_down}%')


Enter Stock1: aapl


[*********************100%***********************]  1 of 1 completed


Enter Stock2: msft


[*********************100%***********************]  1 of 1 completed


Gap %: 2



Total Performance on Positive Gap: -10.013208752228817
Average Performance on Positive Gap: -0.4768194643918484
Odds of Pair Reverting on Positive Gap: 57.1%

Total Performance on Negative Gap: 33.53717516834295
Average Performance on Negative Gap: 0.8179798821547061
Odds of Pair Reverting on Negative Gap: 73.2%
