<a href="https://colab.research.google.com/github/Toyonaga15/Python-for-finance/blob/main/Intraday_Gap_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install yfinance

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting yfinance
  Downloading yfinance-0.2.9-py2.py3-none-any.whl (55 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.9/55.9 KB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Collecting beautifulsoup4>=4.11.1
  Downloading beautifulsoup4-4.11.2-py3-none-any.whl (129 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.4/129.4 KB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting requests>=2.26
  Downloading requests-2.28.2-py3-none-any.whl (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.8/62.8 KB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
Collecting html5lib>=1.1
  Downloading html5lib-1.1-py2.py3-none-any.whl (112 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m112.2/112.2 KB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting cryptography>=3.3.2
  Downloading cryptography-39.0.0-cp36-

In [None]:
import pandas as pd
import yfinance as yf
import numpy as np
from datetime import datetime, timedelta

In [None]:
#Request information about the asset from the user
ticker = input("Enter the asset's ticker according to Yahoo Finance: ")
start_date = input("Enter the start date (YYYY-MM-DD): ")
end_date = input("Enter the end date (YYYY-MM-DD): ")

#Download the asset data
df = yf.download(ticker, start=start_date, end=end_date)

#Convert the index to a date column
df.index = pd.DatetimeIndex(df.index).date

Enter the asset's ticker according to Yahoo Finance: aapl
Enter the start date (YYYY-MM-DD): 2015-01-01
Enter the end date (YYYY-MM-DD): 2023-01-31
[*********************100%***********************]  1 of 1 completed


In [None]:
#Create a "previous_close" column with the previous closing values
df['previous_close'] = df.Close.shift(1)

#Create a "gap" column with the calculation of the difference between the opening price and the previous closing price
df['gap'] = (df.Open - df.previous_close) / df.previous_close * 100

#Create a "signal" column with the result of the analysis if the gap is positive, negative, or no gap
df['signal'] = np.where(df.gap > 0, 'positive', (np.where(df.gap < 0, 'negative', 'no gap')))

#Create an "abs_gap" column with the absolute value of the gap
df['abs_gap'] = abs(df.gap)

#Remove rows with null values
df = df.dropna()

#Sort the data by the index
df.sort_index()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,previous_close,gap,signal,abs_gap
2015-01-05,27.072500,27.162500,26.352501,26.562500,23.910095,257142000,27.332500,-0.951249,negative,0.951249
2015-01-06,26.635000,26.857500,26.157499,26.565001,23.912342,263188400,26.562500,0.272942,positive,0.272942
2015-01-07,26.799999,27.049999,26.674999,26.937500,24.247644,160423600,26.565001,0.884618,positive,0.884618
2015-01-08,27.307501,28.037500,27.174999,27.972500,25.179298,237458000,26.937500,1.373553,positive,1.373553
2015-01-09,28.167500,28.312500,27.552500,28.002501,25.206297,214798000,27.972500,0.697112,positive,0.697112
...,...,...,...,...,...,...,...,...,...,...
2023-01-24,140.309998,143.160004,140.300003,142.529999,142.529999,66435100,141.110001,-0.566936,negative,0.566936
2023-01-25,140.889999,142.429993,138.809998,141.860001,141.860001,65799300,142.529999,-1.150635,negative,1.150635
2023-01-26,143.169998,144.250000,141.899994,143.960007,143.960007,54105100,141.860001,0.923444,positive,0.923444
2023-01-27,143.160004,147.229996,143.080002,145.929993,145.929993,70492800,143.960007,-0.555712,negative,0.555712


In [None]:
#Creation of a dynamic table with information about the gap
dynamic_gap_table = pd.pivot_table(df, index='signal', values='abs_gap', aggfunc=['count','mean','median','max','min'], margins=True)

#Remove the extra level of the added columns
dynamic_gap_table.columns = dynamic_gap_table.columns.droplevel(1)

#Calculation of the relative amount of each type of gap
relative_amount = [dynamic_gap_table.loc['negative','count'] / dynamic_gap_table.loc['All','count'] * 100,
dynamic_gap_table.loc['no gap','count'] / dynamic_gap_table.loc['All','count'] * 100,
dynamic_gap_table.loc['positive','count'] / dynamic_gap_table.loc['All','count'] * 100,
dynamic_gap_table.loc['All','count'] / dynamic_gap_table.loc['All','count'] * 100]

#Addition of the relative amount to the dynamic table
dynamic_gap_table.insert(1, 'relative_amount %', relative_amount)

#Rounding of the values to 2 decimal places
dynamic_gap_table.iloc[:,1:] = dynamic_gap_table.iloc[:,1:].round(2)

#Display of the dynamic table
dynamic_gap_table

Unnamed: 0_level_0,count,relative_amount %,mean,median,max,min
signal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
negative,914,44.98,0.79,0.48,12.96,0.0
no gap,16,0.79,0.0,0.0,0.0,0.0
positive,1102,54.23,0.71,0.46,7.86,0.0
All,2032,100.0,0.74,0.47,12.96,0.0


In [None]:
#Calculate the 25th percentile for the absolute value of gaps
percentile_param = 25
gap_filter = np.percentile(df.abs_gap, percentile_param)

#Display the percentile value
gap_filter

0.19849236735071935

In [None]:
#Creating a new column with valid gap classification
df['valid_gap'] = np.where(df.gap>gap_filter, 'positive', (np.where(df.gap<(gap_filter*-1), 'negative', 'no gap')))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['valid_gap'] = np.where(df.gap>gap_filter, 'positive', (np.where(df.gap<(gap_filter*-1), 'negative', 'no gap')))


In [None]:
#Creating a dynamic table with information about valid gaps
valid_gap_dynamic_table = pd.pivot_table(df, index='valid_gap', values='abs_gap', aggfunc=['count','mean','median','max','min'], margins=True)

#Naming the columns
valid_gap_dynamic_table.columns = valid_gap_dynamic_table.columns.droplevel(1)

#Calculating the relative amount of each valid gap type
relative_amount = [valid_gap_dynamic_table.loc['negative','count']/valid_gap_dynamic_table.loc['All','count']*100,
valid_gap_dynamic_table.loc['no gap','count']/valid_gap_dynamic_table.loc['All','count']*100,
valid_gap_dynamic_table.loc['positive','count']/valid_gap_dynamic_table.loc['All','count']*100,
valid_gap_dynamic_table.loc['All','count']/valid_gap_dynamic_table.loc['All','count']*100]

#Adding the relative amount column
valid_gap_dynamic_table.insert(1, 'relative_amount', relative_amount)

#Rounding the values
valid_gap_dynamic_table.iloc[:,1:] = valid_gap_dynamic_table.iloc[:,1:].round(2)

#Displaying the dynamic table
valid_gap_dynamic_table

Unnamed: 0_level_0,count,relative_amount,mean,median,max,min
valid_gap,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
negative,692,34.06,1.01,0.65,12.96,0.2
no gap,508,25.0,0.1,0.1,0.2,0.0
positive,832,40.94,0.91,0.62,7.86,0.2
All,2032,100.0,0.74,0.47,12.96,0.0


In [None]:
#Add "Closed" column in the table with initial value equal to NaN
df['Closed'] = np.nan

#Loop to fill the "Closed" column
for i in range(0, len(df)):

#Check if gap is positive
  if df.valid_gap.iloc[i] == 'positive':
    #Check if the Low value is less than or equal to the previous closing value
    df['Closed'].iloc[i] = df.Low.iloc[i] <= df.previous_close.iloc[i]

  #Check if gap is negative
  elif df.valid_gap.iloc[i] == 'negative':
    # Check if the High value is greater than or equal to the previous closing value
    df['Closed'].iloc[i] = df.High.iloc[i] >= df.previous_close.iloc[i]

  #Check if gap is no gap
  elif df.valid_gap.iloc[i] == 'no gap':
    #Add the value "no gap"
    df['Closed'].iloc[i] = 'no gap'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Closed'] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj._check_is_chained_assignment_possible()


In [None]:
#Create dynamic table with information about gaps
df_valid_gaps = df[df.valid_gap != 'no gap']

#Create dynamic table with count of closed or not closed gaps
pivot_closed_gaps = pd.pivot_table(df_valid_gaps, index='Closed', values='abs_gap', aggfunc=['count'], margins=True)
pivot_closed_gaps.columns = pivot_closed_gaps.columns.droplevel(1)

#Calculate the relative quantity of closed or not closed gaps
relative_quantity = [pivot_closed_gaps.loc[False, 'count']/pivot_closed_gaps.loc['All', 'count']*100,
pivot_closed_gaps.loc[True, 'count']/pivot_closed_gaps.loc['All', 'count']*100,
pivot_closed_gaps.loc['All', 'count']/pivot_closed_gaps.loc['All', 'count']*100]

#Add column with relative quantity of closed and not closed gaps
pivot_closed_gaps.insert(1, 'relative_quantity %', relative_quantity)

#Round to 2 decimal places
pivot_closed_gaps.iloc[:,1:] = pivot_closed_gaps.iloc[:,1:].round(2)

#Show dynamic table with information about closed or not closed gaps
pivot_closed_gaps

Unnamed: 0_level_0,count,relative_quantity %
Closed,Unnamed: 1_level_1,Unnamed: 2_level_1
False,689,45.21
True,835,54.79
All,1524,100.0


In [None]:
#Select only gaps with value different from "sem gap"
df_gaps_valid = df[df.valid_gap != 'no gap']

#Create pivot table with information about gap closing
pivot_closed_gaps = pd.pivot_table(df_gaps_valid, index=['valid_gap', 'Closed'], values='abs_gap', aggfunc=['count'], margins=True)

#Remove extra level of columns
pivot_closed_gaps.columns = pivot_closed_gaps.columns.droplevel(1)

#Calculate relative number of closed and unclosed gaps
relative_quantity = [pivot_closed_gaps.loc[('negative', False), 'count']/(pivot_closed_gaps.loc[('negative', False), 'count']+pivot_closed_gaps.loc[('negative', True), 'count'])*100,
pivot_closed_gaps.loc[('negative', True), 'count']/(pivot_closed_gaps.loc[('negative', False), 'count']+pivot_closed_gaps.loc[('negative', True), 'count'])*100,
pivot_closed_gaps.loc[('positive', False), 'count']/(pivot_closed_gaps.loc[('positive', False), 'count']+pivot_closed_gaps.loc[('positive', True), 'count'])*100,
pivot_closed_gaps.loc[('positive', True), 'count']/(pivot_closed_gaps.loc[('positive', False), 'count']+pivot_closed_gaps.loc[('positive', True), 'count'])*100,
pivot_closed_gaps.loc[('All', ''), 'count']/(pivot_closed_gaps.loc[('All', ''),'count'])*100]

#Add column with relative quantity of closed and unclosed gaps
pivot_closed_gaps.insert(1, 'relative_quantity %', relative_quantity)

#Round information to two decimal places
pivot_closed_gaps.iloc[:,1:] = pivot_closed_gaps.iloc[:,1:].round(2)

#Show final pivot table
pivot_closed_gaps

Unnamed: 0_level_0,Unnamed: 1_level_0,count,relative_quantity %
valid_gap,Closed,Unnamed: 2_level_1,Unnamed: 3_level_1
negative,False,286,41.33
negative,True,406,58.67
positive,False,403,48.44
positive,True,429,51.56
All,,1524,100.0
