# GOAL: notifications + divergence notifications
------------------------------------------------------------------------------
- Bollinger Band............. yes
- RSI........................ yes
- RSI Divergence............. no
- MACD....................... yes
- MVWAP....................... no
- RVI........................ yes
- MFI........................ no
- Min & Max lines............ yes
- Moving Averages............ no
- Candlestick Patterns....... yes
- Risk Analysis.............. no
- Earnings & Financials...... no
- News....................... yes
- Put/Call Ratio............. no
- Trending................... no
- Consolodating.............. no
- Buy Score:   Some #/#

In [None]:
#importing variables
import pandas as pd
import numpy as np
import datetime as dt
import seaborn as sns
import matplotlib.pyplot as plt
import math
from openpyxl import load_workbook
# Find local peaks
import matplotlib.dates as mdates 
from scipy.signal import argrelextrema
import yfinance as yf
import time
from bs4 import BeautifulSoup
from urllib.request import urlopen
from urllib.request import Request
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import pythoncom
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score
from sklearn.impute import SimpleImputer
from sklearn.cluster import AgglomerativeClustering, KMeans

In [None]:
# Smoothing with Wilder puts emphasis on recent values
def Wilder(data, periods):
    start = np.where(~np.isnan(data))[0][0] #Check if nans present in beginning
    Wilder = np.array([np.nan]*len(data))
    Wilder[start+periods-1] = data[start:(start+periods)].mean() #Simple Moving Average
    for i in range(start+periods,len(data)):
        Wilder[i] = (Wilder[i-1]*(periods-1) + data[i])/periods #Wilder Smoothing
    return(Wilder)

In [None]:
# Import data for the year of ticker(s)
all_data = pd.DataFrame()
test_data = pd.DataFrame()
no_data = []
tickers_final = ['SPY']
for i in tickers_final:
    all_data = yf.download(tickers=i, period = '10y', interval = '1d')
    all_data['symbol'] = i


In [None]:
all_data.head(3)

In [None]:
# Bollinger Band parameters
window = 20
num_std_dev = 2

# Calculate Middle Band
all_data['middle_band'] = all_data['Close'].rolling(window=window).mean()

# Calculate Standard Deviation
all_data['std_dev'] = all_data['Close'].rolling(window=window).std()

# Calculate Upper and Lower Bands
all_data['upper_band'] = all_data['middle_band'] + (all_data['std_dev'] * num_std_dev)
all_data['lower_band'] = all_data['middle_band'] - (all_data['std_dev'] * num_std_dev)

In [None]:
# Calculate percentage of close position between bands
all_data['bollinger_position_score'] = (all_data['Close'] - all_data['lower_band']) / (all_data['upper_band'] - all_data['lower_band'])

In [None]:
# MACD Calculations: check if it is higher than the signal (bullish)
all_data['ShortEMA'] = all_data.Close.transform(lambda x: x.ewm(span=5, adjust=False).mean())
all_data['LongEMA'] = all_data.Close.transform(lambda x: x.ewm(span=35, adjust=False).mean())
all_data['MACD'] = all_data.ShortEMA - all_data.LongEMA
all_data['signal'] = all_data.MACD.transform(lambda x: x.ewm(span=5, adjust=False).mean())
all_data['macd_bullish'] = all_data.apply(lambda x : 1 if (x['MACD'] - x['signal'] > 0) else 0, axis = 1)

In [None]:
# ATR and ADX are used to determine signal strength
# ATR ratio and ADX is used to figure out a true range, for volatility
all_data['prev_close'] = all_data.groupby('symbol')['Close'].shift(1)
all_data['TR'] = np.maximum((all_data['High'] - all_data['Low']), 
                    np.maximum(abs(all_data['High'] - all_data['prev_close']), 
                    abs(all_data['prev_close'] - all_data['Low'])))

for i in all_data['symbol'].unique():
    TR_data = all_data[all_data.symbol == i].copy()
    all_data.loc[all_data.symbol==i,'ATR_5'] = Wilder(TR_data['TR'], 5)
    all_data.loc[all_data.symbol==i,'ATR_15'] = Wilder(TR_data['TR'], 15)
all_data['ATR_Ratio'] = all_data['ATR_5'] / all_data['ATR_15']

# Took this code from the internet for ATR and ADX for a different project, along with the Wilder smoothing function
all_data['prev_high'] = all_data.groupby('symbol')['High'].shift(1)
all_data['prev_low'] = all_data.groupby('symbol')['Low'].shift(1)

all_data['+DM'] = np.where(~np.isnan(all_data.prev_high),
                        np.where((all_data['High'] > all_data['prev_high']) & 
        (((all_data['High'] - all_data['prev_high']) > (all_data['prev_low'] - all_data['Low']))), 
                                                                all_data['High'] - all_data['prev_high'], 
                                                                0),np.nan)
all_data['-DM'] = np.where(~np.isnan(all_data.prev_low),
                        np.where((all_data['prev_low'] > all_data['Low']) & 
        (((all_data['prev_low'] - all_data['Low']) > (all_data['High'] - all_data['prev_high']))), 
                                    all_data['prev_low'] - all_data['Low'], 
                                    0),np.nan)

for i in all_data['symbol'].unique():
    ADX_data = all_data[all_data.symbol == i].copy()
    all_data.loc[all_data.symbol==i,'+DM_15'] = Wilder(ADX_data['+DM'], 15)
    all_data.loc[all_data.symbol==i,'-DM_15'] = Wilder(ADX_data['-DM'], 15)
all_data['+DI_15'] = (all_data['+DM_15']/all_data['ATR_15'])*100
all_data['-DI_15'] = (all_data['-DM_15']/all_data['ATR_15'])*100
all_data['DX_15'] = (np.round(abs(all_data['+DI_15'] - all_data['-DI_15'])/(all_data['+DI_15'] + all_data['-DI_15']) * 100))

for i in all_data['symbol'].unique():
    ADX_data = all_data[all_data.symbol == i].copy()
    all_data.loc[all_data.symbol==i,'ADX_15'] = Wilder(ADX_data['DX_15'], 15)

# Determine if the stock has momentum and volatility (strength of the signal)
all_data['adx_signal'] = all_data.apply(lambda x : 1 if (x['ADX_15'] >= 25) else 0, axis = 1)
all_data['atr_signal'] = all_data.apply(lambda x : 1 if (x['ATR_Ratio'] >= 1) else 0, axis = 1)


In [None]:
# Add Bollinger Band Width as our range
all_data['range'] = all_data['upper_band'] - all_data['lower_band']
all_data['avg_range'] = all_data['range'].rolling(window=10).mean()
all_data['range_score'] = 1 - (all_data['range'] / all_data['avg_range'])

In [None]:
# Choosing threshold to start with arbitrarily, can optimize later
low_atr_threshold = all_data['ATR_15'].quantile(0.25)
all_data['is_consolidating'] = (all_data['ADX_15'] < 20) & (all_data['ATR_15'] < low_atr_threshold)
# Casting 'is_consolidating' to bool to ensure compatibility
all_data['is_consolidating'] = all_data['is_consolidating'].astype(bool)

In [None]:
# Calculate duration of stability for duration scoring
all_data['consolidation_start_date'] = all_data.index.where(all_data['is_consolidating'] & (~all_data['is_consolidating'].shift(1).fillna(False)))
# Forward fill to propagate the start date of the consolidation period forward
all_data['consolidation_start_date'] = all_data['consolidation_start_date'].ffill()

In [None]:
# Calculate duration of stability
all_data['consolidation_duration'] = (all_data.index - all_data['consolidation_start_date']).dt.days # Extracts days from Timedelta
# Normalize duration score
all_data['duration_score'] = (all_data['consolidation_duration'] - all_data['consolidation_duration'].min()) / (all_data['consolidation_duration'].max() - all_data['consolidation_duration'].min())
all_data['duration_score'].fillna(0, inplace=True)  # 0 as default for non-consolidating periods

In [None]:
# Calculates volume score for different classifications
all_data['avg_volume'] = all_data['Volume'].rolling(window=10).mean()
all_data['volume_score'] = 1 - (all_data['Volume'] / (all_data['avg_volume'] + 1e-10))
# all_data['volume_score'] = all_data['volume_score'].clip(lower=0)

In [None]:
# Combine scores to form a consolidated score
all_data['consolidation_score'] = (all_data['range_score'] * 0.30 +
                                   all_data['duration_score'] * 0.13 +
                                   all_data['volume_score'] * 0.2 +
                                  (all_data['ADX_15'] < 20) * 0.185 +  # Use binary signals for low ADX
                                  (all_data['ATR_15'] < low_atr_threshold) * 0.185)

In [None]:
# Mask the zero values in the consolidation score
all_data['consolidation_score_plot'] = all_data['consolidation_score']
# Calculate a simple moving average of the consolidation score with a window size of 10 days
all_data['smoothed_consolidation_score'] = all_data['consolidation_score_plot'].rolling(window=10).mean()

# Normalize smoothed score to fit within the range of close prices for better visualization
min_close = all_data['Close'].min()
max_close = all_data['Close'].max()

scaled_smoothed_consolidation_score = ((all_data['smoothed_consolidation_score'] - all_data['smoothed_consolidation_score'].min()) / 
                                       (all_data['smoothed_consolidation_score'].max() - all_data['smoothed_consolidation_score'].min())) * (max_close - min_close) + min_close

In [None]:
# Parameter: Adjust consolidation threshold as needed
non_consolidating = (all_data['consolidation_score'] <= 0.1)  

fig, (ax1, ax3) = plt.subplots(2, 1, figsize=(14, 10), gridspec_kw={'height_ratios': [3, 1]})

# Plotting the close price and volume
color = 'tab:red'
ax1.plot(all_data.index, all_data['Close'], color=color, label='Close Price')
ax1.set_ylabel('Close Price', color=color)
ax1.tick_params(axis='y', labelcolor=color)

# Secondary y-axis for volume
ax2 = ax1.twinx()
color = 'tab:blue'
ax2.bar(all_data.index, all_data['Volume'], color=color, alpha=0.3, label='Volume')
ax2.set_ylabel('Volume', color=color)
ax2.tick_params(axis='y', labelcolor=color)

# Consolidation score plotting
color = 'tab:green'
ax3.plot(all_data.index, all_data['consolidation_score'], color=color, label='Consolidation Score', linewidth=2)
ax3.set_ylabel('Consolidation Score', color=color)
ax3.set_xlabel('Date')

# Highlight non-consolidating periods
ax3.fill_between(all_data.index, 0, 1, where=non_consolidating, color='red', alpha=0.3, transform=ax3.get_xaxis_transform(), label='Non-Consolidating')

# Formatting dates
ax1.xaxis.set_major_locator(mdates.MonthLocator())
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
ax3.xaxis.set_major_locator(mdates.MonthLocator())
ax3.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))

# Legends
ax1.legend(loc='upper left')
ax2.legend(loc='upper right')
ax3.legend(loc='upper left')

plt.title('Close Price, Volume, and Consolidation Score Over Time')
plt.tight_layout()
plt.show()


In [None]:
# Adjust the trend score to account for the strength of the signals
all_data['trend_score'] = 0

delta = all_data['Close'].diff()
gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
rs = gain / loss
all_data['RSI'] = 100 - (100 / (1 + rs))

# Uptrend indicators
all_data.loc[(all_data['ShortEMA'] > all_data['LongEMA']) & (all_data['MACD'] > all_data['signal']), 'trend_score'] += 1
all_data.loc[all_data['RSI'] > 40, 'trend_score'] += 1  # Consider lowering the threshold for more sensitivity
all_data.loc[all_data['ADX_15'] > 25, 'trend_score'] += 1  # Strong trend
all_data.loc[all_data['ATR_Ratio'] > 1, 'trend_score'] += 1  # Higher current volatility vs. past

# Downtrend indicators
all_data.loc[(all_data['ShortEMA'] < all_data['LongEMA']) & (all_data['MACD'] < all_data['signal']), 'trend_score'] -= 1
all_data.loc[all_data['RSI'] < 60, 'trend_score'] -= 1  # Consider raising the threshold for more sensitivity
all_data.loc[all_data['ADX_15'] > 25, 'trend_score'] -= 1  # Strong trend
all_data.loc[all_data['ATR_Ratio'] < 1, 'trend_score'] -= 1  # Lower current volatility vs. past

# Normalize the trend score to keep it bounded between -1 and 1
all_data['trend_score_normalized'] = all_data['trend_score'] / all_data['trend_score'].abs().max()
all_data['smoothed_trend_score'] = all_data['trend_score_normalized'].rolling(window=10).mean()


In [None]:
fig, (ax1, ax3) = plt.subplots(2, 1, figsize=(14, 10), gridspec_kw={'height_ratios': [3, 1]})

# Plotting the close price and volume
color = 'tab:red'
ax1.plot(all_data.index, all_data['Close'], color=color, label='Close Price')
ax1.set_ylabel('Close Price', color=color)
ax1.tick_params(axis='y', labelcolor=color)

# Secondary y-axis for volume
ax2 = ax1.twinx()
ax2.bar(all_data.index, all_data['Volume'], color='blue', alpha=0.3, label='Volume')
ax2.set_ylabel('Volume', color=color)
ax2.tick_params(axis='y', labelcolor=color)


# Consolidation score plotting
color = 'tab:green'
ax3.plot(all_data.index, all_data['smoothed_trend_score'], color=color, label='Trend Score', linewidth=2)
ax3.set_xlabel('Date')

# Formatting dates
ax1.xaxis.set_major_locator(mdates.MonthLocator())
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
ax3.xaxis.set_major_locator(mdates.MonthLocator())
ax3.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
colors = np.where(all_data['smoothed_trend_score'] > 0, 'green', np.where(all_data['smoothed_trend_score'] < 0, 'red', 'grey'))
ax3.plot(all_data.index, all_data['smoothed_trend_score'], color='grey', label='Trend Score')  # base line in grey
ax3.fill_between(all_data.index, 0, all_data['smoothed_trend_score'], where=all_data['smoothed_trend_score'] >=0, facecolor='green', alpha=0.3, interpolate=True)
ax3.fill_between(all_data.index, 0, all_data['smoothed_trend_score'], where=all_data['smoothed_trend_score'] <=0, facecolor='red', alpha=0.3, interpolate=True)

# Legends
ax1.legend(loc='upper left')
ax2.legend(loc='upper right')
ax3.legend(loc='upper left')

plt.title('Close Price, Volume, and Trend Score Over Time')
plt.tight_layout()
plt.show()


In [None]:
all_data.describe()

In [None]:
# Features to keep, will optimize this
features = ['ATR_Ratio', 'MACD', 'ADX_15', 'consolidation_score', 'trend_score', 'volume_score']
data_selected = all_data[features]

# Handles missing values
imputer = SimpleImputer(strategy='mean')
data_imputed = imputer.fit_transform(data_selected)

# Scale the data
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data_imputed)

# Convert scaled data back to DataFrame, ensuring it keeps the original index
data_for_clustering = pd.DataFrame(data_scaled, columns=features, index=all_data.index)

In [None]:
# Using KMeans for comparison
range_n_clusters = range(2, 6)
elbow = []
silhouette = []

for n_clusters in range_n_clusters:
    clusterer = KMeans(n_clusters=n_clusters, random_state=10)
    cluster_labels = clusterer.fit_predict(data_scaled)
    
    # Elbow method
    elbow.append(clusterer.inertia_)
    
    # Silhouette score
    silhouette_avg = silhouette_score(data_scaled, cluster_labels)
    silhouette.append(silhouette_avg)

# Plotting the results for KMeans
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.plot(range_n_clusters, elbow, 'bo-')
plt.xlabel('Number of Clusters')
plt.ylabel('Sum of Squared Distances')
plt.title('Elbow Method For Optimal k')

plt.subplot(1, 2, 2)
plt.plot(range_n_clusters, silhouette, 'ro-')
plt.xlabel('Number of Clusters')
plt.ylabel('Silhouette Score')
plt.title('Silhouette Score For Optimal k')

plt.tight_layout()
plt.show()


In [None]:
# Assume data_scaled is your pre-processed data
range_n_clusters = range(2, 6)  # Typically 2 to 10 clusters

# Variables to store results
silhouette_scores = []

for n_clusters in range_n_clusters:
    clusterer = AgglomerativeClustering(n_clusters=n_clusters)
    cluster_labels = clusterer.fit_predict(data_scaled)
    
    # Silhouette score
    silhouette_avg = silhouette_score(data_scaled, cluster_labels)
    silhouette_scores.append(silhouette_avg)
    print("For n_clusters =", n_clusters, "The average silhouette_score is :", silhouette_avg)

# Plotting the Silhouette scores
plt.figure(figsize=(12, 6))
plt.plot(range_n_clusters, silhouette_scores, 'ro-')
plt.xlabel('Number of Clusters')
plt.ylabel('Silhouette Score')
plt.title('Silhouette Score For Optimal k')
plt.show()


In [None]:
# Apply clustering
clustering = AgglomerativeClustering(n_clusters = 3)
clustering.fit(data_for_clustering)

# Attach clustering labels back to the original DataFrame
all_data['cluster_labels'] = clustering.labels_

# Check if indices and data length match
print(all_data.shape)
print(data_for_clustering.shape)
print("Clustering labels added:", all_data['cluster_labels'].isnull().sum() == 0)

In [None]:
fig, ax = plt.subplots(figsize=(14, 7))

# Plot the close prices
ax.plot(all_data.index, all_data['Close'], label='Close Price', color='blue')

# Define a color map or specific colors for clusters
colors = ['red', 'green', 'blue']  

# Fill background according to cluster assignment, using a loop
for i, color in enumerate(colors):  # Skip the last color, which we use for unclassified data
    ax.fill_between(all_data.index, all_data['Close'].min(), all_data['Close'].max(),
                    where=(all_data['cluster_labels'] == i),
                    facecolor=color, alpha=0.2, label=f'Cluster {i}')

# Adjust the y-axis limits to focus on the close price range
ax.set_ylim(all_data['Close'].min() * 0.95, all_data['Close'].max() * 1.05)  # Adjust these factors as needed

# Formatting the date axis
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))

# Adding labels and legend
ax.set_title('Close Price with Cluster Label Shading')
ax.set_xlabel('Date')
ax.set_ylabel('Close Price')
ax.legend(loc='upper left')

plt.tight_layout()
plt.show()


In [None]:
# Ensures 'Week' column is added correctly
if not isinstance(all_data.index, pd.PeriodIndex):
    all_data['Week'] = all_data.index.to_period('W')
else:
    all_data['Week'] = all_data.index

# Verify 'Week' column exists
print(all_data['Week'].head())

# Select only numeric columns for aggregation
numeric_data = all_data.select_dtypes(include=[np.number])

# Group by 'Week'  for smoothening
weekly_data = numeric_data.groupby(all_data['Week']).mean()

# Features to keep for weekly aggregated data, will optimize this
features = ['ATR_Ratio', 'MACD', 'ADX_15', 'consolidation_score', 'smoothed_trend_score',  'volume_score']
weekly_selected = weekly_data[features]

# Handling missing values
imputer = SimpleImputer(strategy='mean')
weekly_imputed = imputer.fit_transform(weekly_selected)

# Scaling the data
scaler = StandardScaler()
weekly_scaled = scaler.fit_transform(weekly_imputed)

# Clustering
clustering = AgglomerativeClustering(n_clusters=4)  # Reduced number of clusters
clustering.fit(weekly_scaled)

# Assign clusters back to the original daily data for plotting
all_data['Cluster'] = np.repeat(clustering.labels_, all_data.groupby('Week').size().values)

# Plotting setup
fig, ax = plt.subplots(figsize=(14, 7))
ax.plot(all_data.index, all_data['Close'], label='Close Price', color='blue')

colors = ['red', 'green', 'blue','purple']  # Adjust colors as needed
for i, color in enumerate(colors):
    ax.fill_between(all_data.index, all_data['Close'].min(), all_data['Close'].max(),
                    where=(all_data['Cluster'] == i),
                    facecolor=color, alpha=0.2, label=f'Cluster {i}')

ax.set_ylim(all_data['Close'].min() * 0.95, all_data['Close'].max() * 1.05)
ax.legend()
plt.show()


In [None]:
all_data.head(60)

In [None]:
# Looped version (for when there are multiple tickers)
import datetime
finviz_url = 'https://finviz.com/quote.ashx?t='
news_tables = {}
daynum = 60
tod = datetime.datetime.now()
d = datetime.timedelta(days = daynum)
a = tod - d

df_new = mid.rename_axis('Ticker').head(24)
tickers = df_new.index.tolist()

charts = []
n = 3
for ticker in tickers:
    try:
        url = finviz_url + ticker
        req = Request(url=url,headers={'user-agent': 'my-app/0.0.1'}) 
        resp = urlopen(req)    
        html = BeautifulSoup(resp, features="lxml")
        news_table = html.find(id='news-table')
        news_tables[ticker] = news_table
        charts.append(str(url))
    except:
        pass

try:
    for ticker in tickers:
        df1 = news_tables[ticker]
        df_tr = df1.findAll('tr')
        display (df1)
        display (df_tr)
        print ('\n')
        print ('Recent News Headlines for {}: '.format(ticker))
        for i, table_row in enumerate(df_tr):
            a_text = table_row.a.text
            td_text = table_row.td.text
            td_text = td_text.strip()
            print(a_text,'(',td_text,')')
            if i == n-1:
                break
except KeyError:
    pass

# Iterate through the news
parsed_news = []
for file_name, news_table in news_tables.items():
    for x in news_table.findAll('tr'):
        text = x.a.get_text() 
        date_scrape = x.td.text.split()

        if len(date_scrape) == 1:
            time = date_scrape[0]
        else:
            date = date_scrape[0]
            time = date_scrape[1]

        ticker = file_name.split('_')[0]
        parsed_news.append([ticker, date, time, text])

# Sentiment Analysis
analyzer = SentimentIntensityAnalyzer()

columns = ['Ticker', 'Date', 'Time', 'Headline']
news = pd.DataFrame(parsed_news, columns=columns)
scores = news['Headline'].apply(analyzer.polarity_scores).tolist()

df_scores = pd.DataFrame(scores)
news = news.join(df_scores, rsuffix='_right')

# View Data 
news['Date'] = pd.to_datetime(news.Date).dt.date

unique_ticker = news['Ticker'].unique().tolist()
news_dict = {name: news.loc[news['Ticker'] == name] for name in unique_ticker}

values = []
for ticker in tickers: 
    try:
        dataframe = news_dict[ticker]
        dataframe = dataframe.set_index('Ticker')
        dataframe = dataframe.drop(columns = ['Headline'])
        print ('\n')
        print (dataframe.head())
        mean = round(dataframe['compound'].mean(), 2)
        values.append(mean)
    except:
        pass

df1 = pd.DataFrame(list(zip(tickers, values, charts)), columns =['Ticker', 'Mean Sentiment', 'Chart']) 
df1 = df1.sort_values(by=['Mean Sentiment'], ascending=False)
print(df1)
# df1['Mean Sentiment'] = (df1['Mean Sentiment'] * 100) 
df1 = df1.astype({"Mean Sentiment": np.float16})
df1['Mean Sentiment'] = df1.apply(lambda x : (1 + x['Mean Sentiment']) if (x['Mean Sentiment'] >= 0) else 1 - x['Mean Sentiment'], axis = 1)
print ('\n')
print (df_new)

new = pd.merge(df_new, df1, on = 'Ticker', how = 'outer')
new = new.sort_values(by=['Mean Sentiment'], ascending=False)

print (new)
final_scores = new['Mean Sentiment'].tolist()
final_tickers = new.Ticker.tolist()
stocks = dict(zip(final_tickers, final_scores))

new.to_excel( r'C:\Users\amoog\Desktop\Stock_Notifier\Stock_Notifier\spreadsheets\{fdate}.xlsx'.format(fdate = 'Sentiment ' + tod.strftime("%d-%m-%Y")), sheet_name = 'today', index=False)
