In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd  # Import pandas library

base_url = "https://www.tradingview.com/symbols/NASDAQ-AAPL/ideas/"
sort_option = "?sort=recent"
max_pages = 10  # Adjust the number of pages you want to scrape

data_list = []

for page in range(1, max_pages + 1):
    url = f"{base_url}page-{page}/{sort_option}"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    timestamp_elements = soup.find_all("span", class_="tv-card-stats__time", attrs={"data-timestamp": True})
    a_elements = soup.find_all("a", class_="tv-widget-idea__cover-link js-widget-idea__popup")
    info_rows = soup.find_all("div", class_="tv-widget-idea__info-row")
    username_elements = soup.find_all("span", class_="tv-card-user-info__name")
    
    if not timestamp_elements or not a_elements or not info_rows or not username_elements:
        break
    
    for timestamp_element, a_element, row, username_element in zip(timestamp_elements, a_elements, info_rows, username_elements):
        timestamp = timestamp_element["data-timestamp"]
        img_element = a_element.find("img")
        alt_text = img_element["alt"]
        data_href = a_element["data-href"]
        
        label_element = row.find("span", class_="badge-PlSmolIm size-medium-PlSmolIm badge-idea-ZleujXqe type-short-ZleujXqe size-medium-ZleujXqe variant-primary-ZleujXqe with-icon-ZleujXqe tv-widget-idea__label") or \
                        row.find("span", class_="badge-PlSmolIm size-medium-PlSmolIm badge-idea-ZleujXqe type-long-ZleujXqe size-medium-ZleujXqe variant-primary-ZleujXqe with-icon-ZleujXqe tv-widget-idea__label")
        
        if label_element:
            label_text = label_element.find("span", class_="content-PlSmolIm badge-idea-content-ZleujXqe").get_text()
        else:
            label_text = "N/A"
        
        username = username_element.get_text()
        
        chart_url = f"https://www.tradingview.com{data_href}"
        response = requests.get(chart_url)
        soup = BeautifulSoup(response.content, 'html.parser')
        description_element = soup.find("div", class_="tv-chart-view__description")
        if description_element:
            description_text = description_element.get_text()
        else:
            description_text = "Description not found on the page."
        
        data_list.append((timestamp, alt_text, label_text, description_text, username))

# Create a DataFrame from the collected data
df = pd.DataFrame(data_list, columns=["Timestamp", "Alt Text", "Label", "Description", "Username"])

# Print the DataFrame
print(df)


        Timestamp                                           Alt Text  Label  \
0    1692129257.0            AAPL: Apple -> Correction Already Over?   Long   
1    1691660740.0  AAPL: AAPL, Crucial Trend-Dynamics, Timeframes...    N/A   
2    1692036887.0                    AAPL: ✅APPLE LONG FROM SUPPORT🚀   Long   
3    1691951238.0                                         AAPL: Aapl   Long   
4    1691352868.0          AAPL: Apple -> Massive Breakdown And Now?  Short   
..            ...                                                ...    ...   
175  1689956352.0                                        AAPL: $AAPL  Short   
176  1689932186.0                                         AAPL: AAPL    N/A   
177  1689929130.0                             AAPL: AAPL still solid   Long   
178  1689919112.0                                   AAPL: AAPL Short  Short   
179  1689886577.0  AAPL: Part 2 of 7 Mega cap Tech stocks | QQQ S...  Short   

                                           Descript

In [2]:
   
# Create a DataFrame from the collected data
df = pd.DataFrame(data_list, columns=["Timestamp", "Alt Text", "Label", "Description", "Username"])

# Convert Timestamp to datetime
df["Timestamp"] = pd.to_datetime(df["Timestamp"], unit="s")

# Assign appropriate data types to other columns
data_types = {
    "Alt Text": str,
    "Label": str,
    "Description": str,
    "Username": str
}

df = df.astype(data_types)

  df["Timestamp"] = pd.to_datetime(df["Timestamp"], unit="s")


In [3]:
df.head()

Unnamed: 0,Timestamp,Alt Text,Label,Description,Username
0,2023-08-15 19:54:40,AAPL: Apple -> Correction Already Over?,Long,"Hello Traders and Investors, \r\nmy name is Ph...",basictradingtv
1,2023-08-10 09:46:40,"AAPL: AAPL, Crucial Trend-Dynamics, Timeframes...",,Hello There! \n\r\nWelcome to my video idea a...,VincePrince
2,2023-08-14 18:14:24,AAPL: ✅APPLE LONG FROM SUPPORT🚀,Long,\n\n\n\n\n\n✅APPLE is trading in an \r\nUptren...,ProSignalsFx
3,2023-08-13 18:27:12,AAPL: Aapl,Long,"Weekly chart is hideous, definitely expectin...",ContraryTrader
4,2023-08-06 20:13:52,AAPL: Apple -> Massive Breakdown And Now?,Short,"Hello Traders and Investors, \r\nmy name is Ph...",basictradingtv


In [4]:
from textblob import TextBlob


# Function to perform sentiment analysis using TextBlob
def analyze_sentiment(description):
    analysis = TextBlob(description)
    sentiment_score = analysis.sentiment.polarity
    if sentiment_score > 0:
        return "Positive"
    elif sentiment_score < 0:
        return "Negative"
    else:
        return "Neutral"

# Apply sentiment analysis to the 'Description' column
df['Sentiment'] = df['Description'].apply(analyze_sentiment)

# Encode sentiment labels into numerical values
sentiment_mapping = {"Positive": 1, "Neutral": 0, "Negative": -1}
df['Sentiment_Encoded'] = df['Sentiment'].map(sentiment_mapping)

# Encode label values into numerical values
label_mapping = {"Long": 1, "N/A": 0, "Short": -1}
df['Label_Encoded'] = df['Label'].map(label_mapping)

# Calculate correlation between 'Label_Encoded' and 'Sentiment_Encoded'
correlation = df['Label_Encoded'].corr(df['Sentiment_Encoded'])

# Print the correlation value
print("Correlation between Label and Sentiment:", correlation)

Correlation between Label and Sentiment: 0.106818226795702


In [5]:
df.groupby(['Label', 'Sentiment'])['Timestamp'].count()

Label  Sentiment
Long   Negative     11
       Neutral       4
       Positive     43
N/A    Negative     12
       Neutral       8
       Positive     47
Short  Negative     16
       Neutral       5
       Positive     34
Name: Timestamp, dtype: int64

In [6]:
pd.set_option('display.max_colwidth', None)

df[df['Label'] == 'Short']['Description']

4                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       

In [7]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from collections import Counter

# Set display option to show full content of columns
pd.set_option('display.max_colwidth', None)

# Load NLTK stopwords
nltk.download('punkt')
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))



# Tokenize and preprocess the description text
def preprocess_text(text):
    words = word_tokenize(text.lower())
    filtered_words = [word for word in words if word.isalnum() and word not in stop_words]
    return filtered_words

# Apply preprocessing to the Description column
df['Processed_Description'] = df['Description'].apply(preprocess_text)

# Count keywords
keyword_counter = Counter()
for keywords in df['Processed_Description']:
    keyword_counter.update(keywords)

# Display the most common keywords
most_common_keywords = keyword_counter.most_common(10)
print("Most common keywords:", most_common_keywords)


Most common keywords: [('price', 165), ('apple', 118), ('market', 103), ('aapl', 94), ('stock', 81), ('see', 76), ('time', 76), ('trading', 68), ('wave', 64), ('support', 61)]


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Chris\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Chris\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [13]:
df[['Description','Processed_Description']].head()

Unnamed: 0,Description,Processed_Description
0,"Hello Traders and Investors, \r\nmy name is Philip and today I will provide a free and educational multi-timeframe technical analysis of Apple💪 \n\r\nStarting on the monthly timeframe you can see that after Apple broke out of the clear triangle formation in confluence with the bullish moving averages, Apple created a strong rally of 30% towards the upside, breaking major resistance. \n\r\nOn the weekly timeframe you can see that Apple is already approching previous resistance which could be acting as support and after the retest of the 0.382 fibonacci retracement level we could see at least a short term bullish bounce. \n\r\nBut Apple stock is still creating bearish market structure so there is also a chance that Apple stock will just break below the current support level - If you are looking for longs though I would simply wait for a break aboce the bearish daily trendline and then enter a long position. \n\nKeep in mind: Don't get caught up in short term moves and always look at the long term picture; building wealth is a marathon and not a quick sprint📈 \n\r\nThank you for watching and I will see you tomorrow! \n\nMy previous analysis of this asset: \n\n\n\n\n03:42\n\n","[hello, traders, investors, name, philip, today, provide, free, educational, technical, analysis, starting, monthly, timeframe, see, apple, broke, clear, triangle, formation, confluence, bullish, moving, averages, apple, created, strong, rally, 30, towards, upside, breaking, major, resistance, weekly, timeframe, see, apple, already, approching, previous, resistance, could, acting, support, retest, fibonacci, retracement, level, could, see, least, short, term, bullish, bounce, apple, stock, still, creating, bearish, market, structure, also, chance, apple, stock, break, current, support, level, looking, longs, though, would, simply, wait, break, aboce, bearish, daily, trendline, enter, long, position, keep, mind, get, caught, short, term, moves, always, look, long, term, picture, building, wealth, marathon, ...]"
1,"Hello There! \n\r\nWelcome to my video idea about AAPL on the 4-hour-, daily-, and weekly timeframe perspectives. AAPL is a major stock asset in the stock market with almost nearly $3 Billion in market-cap being the biggest market-cap stock within the stock market. \n\r\nIn my video I lift the secret behind the recent massive dump towards the downside and what should be considered with the dynamics in correlation with the levels that Apple approaches now. Apple is now in a crucial decisive movement with the bearish trend to either continue or reverse. \n\r\nWe will watch the symbol closely and cover any important changes in the dynamics. Especially within the next times and major changes within the market ongoing it will be an exciting dynamic when also considering a potential positioning in the market in the long- or short direction. \n\nIn this manner, it will be greatly appreciated when you support my video. Have a good day and many lucks in trading. \n\r\nVP","[hello, welcome, video, idea, aapl, weekly, timeframe, perspectives, aapl, major, stock, asset, stock, market, almost, nearly, 3, billion, biggest, stock, within, stock, market, video, lift, secret, behind, recent, massive, dump, towards, downside, considered, dynamics, correlation, levels, apple, approaches, apple, crucial, decisive, movement, bearish, trend, either, continue, reverse, watch, symbol, closely, cover, important, changes, dynamics, especially, within, next, times, major, changes, within, market, ongoing, exciting, dynamic, also, considering, potential, positioning, market, short, direction, manner, greatly, appreciated, support, video, good, day, many, lucks, trading, vp]"
2,\n\n\n\n\n\n✅APPLE is trading in an \r\nUptrend and the stock \r\nWas making a bearish correction \r\nBut now the price has reached a \r\nSupport level of 175.00$ from \r\nWhere I think we will see \r\nA bullish rebound \nLONG🚀 \n\n\r\n✅Like and subscribe to never miss a new idea!✅ \n,"[trading, uptrend, stock, making, bearish, correction, price, reached, support, level, think, see, bullish, rebound, subscribe, never, miss, new, idea]"
3,"Weekly chart is hideous, definitely expecting aapl to test 169 by mid sept \n\n\n\n\n\n\r\nBut in the mean time , the 4hour chart is way to oversold . The RSI and MFI hasn't been this oversold since Dec 2022. \n\r\nLooking for a rebound back up to 182.50 -185.00","[weekly, chart, hideous, definitely, expecting, aapl, test, 169, mid, sept, mean, time, 4hour, chart, way, oversold, rsi, mfi, oversold, since, dec, looking, rebound, back]"
4,"Hello Traders and Investors, \r\nmy name is Philip and today I will provide a free and educational multi-timeframe technical analysis of Apple💪 \n\r\nStarting on the monthly timeframe you can see that after Apple broke out of the clear triangle formation in confluence with the bullish moving averages, Apple created a strong rally of 30% towards the upside, breaking major resistance. \n\r\nAs I mentioned over and over again the weekly timeframe looked quite overextended so I do expect even more short term bearish pressure before a reversal will be quite likely. \n\r\nWith Apple's gap down of -5% on Friday my last analysis, linked below, perfectly played out but there is no reason why Apple stock should reverse immediately so be careful and don't jump into longs too early. \n\r\nKeep in mind: Don't get caught up in short term moves and always look at the long term picture; building wealth is a marathon and not a quick sprint📈 \n\r\nThank you for watching and I will see you tomorrow! \n\r\nMy previous analysis of this asset:","[hello, traders, investors, name, philip, today, provide, free, educational, technical, analysis, starting, monthly, timeframe, see, apple, broke, clear, triangle, formation, confluence, bullish, moving, averages, apple, created, strong, rally, 30, towards, upside, breaking, major, resistance, mentioned, weekly, timeframe, looked, quite, overextended, expect, even, short, term, bearish, pressure, reversal, quite, likely, apple, gap, friday, last, analysis, linked, perfectly, played, reason, apple, stock, reverse, immediately, careful, jump, longs, early, keep, mind, get, caught, short, term, moves, always, look, long, term, picture, building, wealth, marathon, quick, thank, watching, see, tomorrow, previous, analysis, asset]"


In [31]:
import yfinance as yf 
from datetime import datetime
# Define the ticker symbol (AAPL for Apple)
ticker = "AAPL"

# Fetch historical data using yfinance
APPL=yf.download(ticker, start=df['Timestamp'].min(),end=df['Timestamp'].max())

APPL

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-07-20,195.089996,196.470001,192.5,193.130005,192.869553,59581200
2023-07-21,194.100006,194.970001,191.229996,191.940002,191.681168,71917800
2023-07-24,193.410004,194.910004,192.25,192.75,192.490067,45377800
2023-07-25,193.330002,194.440002,192.919998,193.619995,193.358887,37283200
2023-07-26,193.669998,195.639999,193.320007,194.5,194.237701,47471900
2023-07-27,196.020004,197.199997,192.550003,193.220001,192.959427,47460200
2023-07-28,194.669998,196.630005,194.139999,195.830002,195.565918,48291400
2023-07-31,196.059998,196.490005,195.259995,196.449997,196.185074,38824100
2023-08-01,196.240005,196.729996,195.279999,195.610001,195.346207,35175100
2023-08-02,195.039993,195.179993,191.850006,192.580002,192.320297,50389300


In [59]:
merged_df = df.merge(APPL, left_index=True, right_index=True, how='left')
merged_df.head()

Unnamed: 0_level_0,Alt Text,Label,Description,Username,Sentiment,Sentiment_Encoded,Label_Encoded,Processed_Description,Open,High,Low,Close,Adj Close,Volume
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2023-07-20,AAPL: Part 2 of 7 Mega cap Tech stocks | QQQ Sp500 Analysis,Short,Hourly downtrends confirm on all mega cap tech and QQQ/ SPY \r\nwill see if bears are serious about confirming daily and 4h downtrend in the next couple trading days.,ArcadiaTrading,Negative,-1,-1,"[hourly, downtrends, confirm, mega, cap, tech, spy, see, bears, serious, confirming, daily, 4h, downtrend, next, couple, trading, days]",195.089996,196.470001,192.5,193.130005,192.869553,59581200.0
2023-07-21,AAPL: AAPL Pullback,Short,\n\n\nAAPL\n\n\n \r\nApple due for a pullback. \n\r\nBumping against the underside of the median line. \n\r\nVolume divergence. \n\r\nRSI divergence. \n\r\nLook for a retrace to .382 or .50 Fibs. \n\n \r\nFat_Fat \n,Cousin_Fat_Fat,Negative,-1,-1,"[aapl, apple, due, pullback, bumping, underside, median, line, volume, divergence, rsi, divergence, look, retrace, fibs]",194.100006,194.970001,191.229996,191.940002,191.681168,71917800.0
2023-07-21,AAPL: Sell 193 sl 200 tp 100,Short,Expect huge drop on way pls like for support thank you,I_AM_ADRENALINE,Positive,1,-1,"[expect, huge, drop, way, pls, like, support, thank]",194.100006,194.970001,191.229996,191.940002,191.681168,71917800.0
2023-07-21,AAPL: $AAPL,Short,"sum rather large distribution, sittin around 186+/- unrealized, this will b thu last shoe to fall off. we already did a number on META NVDA AMZN TSLA scamz thu past week.",nobullshytrader0,Negative,-1,-1,"[sum, rather, large, distribution, sittin, around, unrealized, b, thu, last, shoe, fall, already, number, meta, nvda, amzn, tsla, scamz, thu, past, week]",194.100006,194.970001,191.229996,191.940002,191.681168,71917800.0
2023-07-21,AAPL: AAPL,,close 2 days above 195 will go to 200 \r\nclose 2 days down 195 will go to 180 \n\n\r\nthis is a training chart.,alharby89,Negative,-1,0,"[close, 2, days, 195, go, 200, close, 2, days, 195, go, 180, training, chart]",194.100006,194.970001,191.229996,191.940002,191.681168,71917800.0


In [65]:

# Assuming you have the 'merged_df' DataFrame with the stock price data
window_size = 5

# Calculate rolling mean and rolling standard deviation

a=merged_df['Close'].rolling(window=window_size)

a

Rolling [window=5,center=False,axis=0,method=single]

In [66]:
a= merged_df['Close'].rolling(window=window_size).mean()
a

Timestamp
2023-07-20           NaN
2023-07-21           NaN
2023-07-21           NaN
2023-07-21           NaN
2023-07-21    192.178003
                 ...    
2023-08-14    179.460007
2023-08-14    179.460007
2023-08-15    179.058005
2023-08-15    178.656003
2023-08-17           NaN
Name: Close, Length: 180, dtype: float64