In [6]:
import os
import django

# Allow Django ORM to be used in async context (Jupyter)
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"

# Set the Django settings module
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "StockSanket.settings")
django.setup()

from stocks.models import Stock


In [7]:
# Load all NEPSE stock symbols
stock_symbols = list(Stock.objects.values_list('symbol', 'company'))

# Preview a few
print(stock_symbols[:5])

[('NABIL', 'Nabil Bank Limited'), ('NIMB', 'Nepal Investment Mega Bank Limited'), ('SCB', 'Standard Chartered Bank  Nepal Limited'), ('HBL', 'Himalayan Bank Limited'), ('SBI', 'Nepal SBI Bank Limited')]


In [13]:
pip install selenium webdriver-manager






[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [12]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager

chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-gpu")

service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=chrome_options)
driver.get("https://www.google.com")
print("✅ Page title:", driver.title)
driver.quit()


✅ Page title: Google


In [23]:
import os
import time
import pandas as pd
import django
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from webdriver_manager.chrome import ChromeDriverManager

# Django setup
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "StockSanket.settings")
django.setup()
from stocks.models import Stock

# Load stock symbols and company names
stock_symbols = list(Stock.objects.values_list('symbol', 'company'))
print("🔢 Total stocks loaded:", len(stock_symbols))

def scrape_news_for_all_stocks():
    chrome_options = Options()
    # chrome_options.add_argument("--headless")  # disable for debugging
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--window-size=1920,1080")

    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=chrome_options)
    wait = WebDriverWait(driver, 10)
    all_news = []

    for idx, (symbol, company) in enumerate(stock_symbols):
        print(f"\n🔍 [{idx + 1}/{len(stock_symbols)}] Scraping: {symbol} ({company})")
        try:
            driver.get("https://merolagani.com/NewsList.aspx?catid=all")

            # Wait for the autosuggest input
            input_id = "ctl00_ContentPlaceHolder1_ASCompanyFilter_txtAutoSuggest"
            wait.until(EC.presence_of_element_located((By.ID, input_id)))
            search_input = driver.find_element(By.ID, input_id)
            search_input.clear()
            search_input.send_keys(symbol)
            print(f"✅ Entered symbol: {symbol}")

            # Wait a bit for autosuggest to populate (simulate user pause)
            time.sleep(1.5)

            # Click Search
            search_btn = driver.find_element(By.ID, "ctl00_ContentPlaceHolder1_lbtnSearch")
            driver.execute_script("arguments[0].click();", search_btn)
            print("✅ Search clicked")

            # Wait for either news or "Could not find news"
            try:
                wait.until(
                    EC.any_of(
                        EC.presence_of_element_located((By.CSS_SELECTOR, ".media-news")),
                        EC.presence_of_element_located((By.XPATH, "//div[@class='panel-body' and contains(text(), 'Could not find news')]"))
                    )
                )
            except TimeoutException:
                print("⚠️ Timeout waiting for search results.")
                continue

            # If no news found, skip
            if driver.find_elements(By.XPATH, "//div[@class='panel-body' and contains(text(), 'Could not find news')]"):
                print(f"❌ No news found for {symbol}")
            else:
                seen_links = set()
                load_attempts = 0
                MAX_ATTEMPTS = 15

                while True:
                    news_blocks = driver.find_elements(By.CSS_SELECTOR, ".media-news")
                    new_found = False

                    for block in news_blocks:
                        try:
                            title_el = block.find_element(By.CSS_SELECTOR, "h4.media-title a")
                            link = title_el.get_attribute("href")
                            if link in seen_links:
                                continue

                            image_el = block.find_element(By.CSS_SELECTOR, ".media-wrap img")
                            image = image_el.get_attribute("src")
                            if not image:
                                continue  # skip if image missing

                            title = title_el.text
                            date = block.find_element(By.CSS_SELECTOR, "span.media-label").text

                            all_news.append({
                                "symbol": symbol,
                                "company": company,
                                "title": title,
                                "link": link,
                                "date": date,
                                "image": image
                            })
                            seen_links.add(link)
                            new_found = True
                        except:
                            continue

                    try:
                        load_more = driver.find_element(By.XPATH, "//a[contains(text(),'Load More')]")
                        if load_more.is_displayed():
                            driver.execute_script("arguments[0].scrollIntoView(true);", load_more)
                            driver.execute_script("arguments[0].click();", load_more)
                            print("🔁 Clicked 'Load More'")
                            time.sleep(2)
                            load_attempts += 1
                            if load_attempts >= MAX_ATTEMPTS:
                                print("⚠️ Max 'Load More' attempts reached.")
                                break
                        else:
                            break
                    except NoSuchElementException:
                        break

                    if not new_found:
                        print("✅ No more new articles.")
                        break

        except Exception as e:
            print(f"❌ Error processing {symbol}: {e}")

        # Always click "Clear" before next search
        try:
            clear_btn = driver.find_element(By.ID, "ctl00_ContentPlaceHolder1_lbtnClear")
            driver.execute_script("arguments[0].click();", clear_btn)
            print("🧹 Cleared search input")
            time.sleep(1)
        except:
            print("⚠️ Could not clear input for", symbol)

    driver.quit()

    df = pd.DataFrame(all_news)
    df.drop_duplicates(subset=["link"], inplace=True)
    df.to_csv("stock_sentiment_news.csv", index=False, encoding="utf-8-sig")
    print(f"\n✅ Scraping complete. Total unique news: {len(df)}")
    return df

if __name__ == "__main__":
    scrape_news_for_all_stocks()


🔢 Total stocks loaded: 582

🔍 [1/582] Scraping: NABIL (Nabil Bank Limited)
✅ Entered symbol: NABIL
✅ Search clicked
🔁 Clicked 'Load More'
🔁 Clicked 'Load More'
🔁 Clicked 'Load More'
🔁 Clicked 'Load More'
🔁 Clicked 'Load More'
✅ No more new articles.
🧹 Cleared search input

🔍 [2/582] Scraping: NIMB (Nepal Investment Mega Bank Limited)
✅ Entered symbol: NIMB
✅ Search clicked
🔁 Clicked 'Load More'
🔁 Clicked 'Load More'
✅ No more new articles.
🧹 Cleared search input

🔍 [3/582] Scraping: SCB (Standard Chartered Bank  Nepal Limited)
✅ Entered symbol: SCB
✅ Search clicked
🔁 Clicked 'Load More'
🔁 Clicked 'Load More'
✅ No more new articles.
🧹 Cleared search input

🔍 [4/582] Scraping: HBL (Himalayan Bank Limited)
✅ Entered symbol: HBL
✅ Search clicked
🔁 Clicked 'Load More'
🔁 Clicked 'Load More'
✅ No more new articles.
🧹 Cleared search input

🔍 [5/582] Scraping: SBI (Nepal SBI Bank Limited)
✅ Entered symbol: SBI
✅ Search clicked
🔁 Clicked 'Load More'
🔁 Clicked 'Load More'
✅ No more new articles.


In [3]:
import pandas as pd
df_news = pd.read_csv("stock_sentiment_news.csv", header=0)
df_news.columns = df_news.columns.str.strip()  # Clean up whitespace if any

# Rename for clarity if needed
df_news.rename(columns={"Column1": "title", "Column2": "link", "Column3": "date"}, inplace=True)


In [4]:
print(df_news.columns)
print(df_news.head())

Index(['symbol', 'company', 'title', 'link', 'date', 'image'], dtype='object')
  symbol             company  \
0  NABIL  Nabil Bank Limited   
1  NABIL  Nabil Bank Limited   
2  NABIL  Nabil Bank Limited   
3  NABIL  Nabil Bank Limited   
4  NABIL  Nabil Bank Limited   

                                               title  \
0            नबिल बैंकले ल्यायो ‘नबिल दिगो बचत खाता’   
1  नबिल एसएसई फेलोसिप कार्यक्रमको आवेदन खुला, आगा...   
2  अन्तर्राष्ट्रिय नारी दिवसमा नबिल बैंकले गरेको ...   
3  कोशी प्रदेशमा नबिल बैंकको ‘नबिल एसएसई कनेक्टः ...   
4       नबिल बैंकद्वारा नबिल एसएसई कार्यक्रम सम्पन्न   

                                                link                   date  \
0  https://merolagani.com/NewsDetail.aspx?newsID=...  Apr 08, 2025 11:13 AM   
1  https://merolagani.com/NewsDetail.aspx?newsID=...  Mar 10, 2025 06:09 PM   
2  https://merolagani.com/NewsDetail.aspx?newsID=...  Mar 09, 2025 01:59 PM   
3  https://merolagani.com/NewsDetail.aspx?newsID=...  Feb 23, 2025 12:59 PM

In [10]:
from deep_translator import GoogleTranslator
from textblob import TextBlob

def translate_and_analyze(text):
    try:
        translated = GoogleTranslator(source='auto', target='en').translate(text)
        blob = TextBlob(translated)
        return pd.Series([translated, blob.sentiment.polarity])
    except:
        return pd.Series(["", 0.0])


  from scipy.stats import fisher_exact


In [5]:
def match_stock_symbol(title):
    matched = []
    for symbol, company in stock_symbols:
        words = company.lower().split()
        if sum(word in title.lower() for word in words) >= 1:
            matched.append(symbol)
    return matched


In [6]:
print(df_news.columns)
print(df_news.head())

Index(['symbol', 'company', 'title', 'link', 'date', 'image'], dtype='object')
  symbol             company  \
0  NABIL  Nabil Bank Limited   
1  NABIL  Nabil Bank Limited   
2  NABIL  Nabil Bank Limited   
3  NABIL  Nabil Bank Limited   
4  NABIL  Nabil Bank Limited   

                                               title  \
0            नबिल बैंकले ल्यायो ‘नबिल दिगो बचत खाता’   
1  नबिल एसएसई फेलोसिप कार्यक्रमको आवेदन खुला, आगा...   
2  अन्तर्राष्ट्रिय नारी दिवसमा नबिल बैंकले गरेको ...   
3  कोशी प्रदेशमा नबिल बैंकको ‘नबिल एसएसई कनेक्टः ...   
4       नबिल बैंकद्वारा नबिल एसएसई कार्यक्रम सम्पन्न   

                                                link                   date  \
0  https://merolagani.com/NewsDetail.aspx?newsID=...  Apr 08, 2025 11:13 AM   
1  https://merolagani.com/NewsDetail.aspx?newsID=...  Mar 10, 2025 06:09 PM   
2  https://merolagani.com/NewsDetail.aspx?newsID=...  Mar 09, 2025 01:59 PM   
3  https://merolagani.com/NewsDetail.aspx?newsID=...  Feb 23, 2025 12:59 PM

In [7]:
pip install "httpcore<0.15"


Collecting httpcore<0.15Note: you may need to restart the kernel to use updated packages.

  Using cached httpcore-0.14.7-py3-none-any.whl.metadata (15 kB)
Collecting h11<0.13,>=0.11 (from httpcore<0.15)
  Using cached h11-0.12.0-py3-none-any.whl.metadata (8.1 kB)
Using cached httpcore-0.14.7-py3-none-any.whl (68 kB)
Using cached h11-0.12.0-py3-none-any.whl (54 kB)
Installing collected packages: h11, httpcore
  Attempting uninstall: h11
    Found existing installation: h11 0.14.0
    Uninstalling h11-0.14.0:
      Successfully uninstalled h11-0.14.0
  Attempting uninstall: httpcore
    Found existing installation: httpcore 1.0.7
    Uninstalling httpcore-1.0.7:
      Successfully uninstalled httpcore-1.0.7
Successfully installed h11-0.12.0 httpcore-0.14.7


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
httpx 0.27.2 requires httpcore==1.*, but you have httpcore 0.14.7 which is incompatible.


In [8]:
pip uninstall googletrans -y


Note: you may need to restart the kernel to use updated packages.




In [9]:
pip install deep-translator


Note: you may need to restart the kernel to use updated packages.


In [10]:
pip install vaderSentiment


Note: you may need to restart the kernel to use updated packages.


In [11]:
from deep_translator import GoogleTranslator
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import pandas as pd

# Load your CSV
df_news = pd.read_csv("stock_sentiment_news.csv")
df_news.columns = df_news.columns.str.strip()  # Remove whitespace from column names

# Setup sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Translation + sentiment function
def safe_translate_and_analyze(title):
    try:
        translated = GoogleTranslator(source='auto', target='en').translate(title)
        score = analyzer.polarity_scores(translated)
        polarity = score['compound']
        print(f"📝 {title} → {translated} → polarity: {polarity}")
        return translated, polarity
    except Exception as e:
        print(f"⚠️ Error for '{title}': {e}")
        return "", 0.0

# Apply function to all titles
translated_polarity_df = df_news["title"].apply(safe_translate_and_analyze).apply(pd.Series)
translated_polarity_df.columns = ["translated", "polarity"]

# Merge results into original DataFrame
df_news = pd.concat([df_news, translated_polarity_df], axis=1)

# Optional: Add sentiment label
def get_sentiment_label(p):
    if p >= 0.05:
        return "positive"
    elif p <= -0.05:
        return "negative"
    else:
        return "neutral"

df_news["sentiment"] = df_news["polarity"].apply(get_sentiment_label)

# Save to CSV
df_news.to_csv("stock_sentiment_with_translation.csv", index=False, encoding="utf-8-sig")
print("✅ CSV saved: stock_sentiment_with_translation.csv")


📝 नबिल बैंकले ल्यायो ‘नबिल दिगो बचत खाता’ → Nabil Bank brought 'Nabil Save Account' → polarity: 0.4939
📝 नबिल एसएसई फेलोसिप कार्यक्रमको आवेदन खुला, आगामी चैत २५ गतेसम्म आवेदन दिन सक्ने → Nabille is the application of the application of the SSE Fellowocope program open, to apply → polarity: 0.0
📝 अन्तर्राष्ट्रिय नारी दिवसमा नबिल बैंकले गरेको वाकाथन सम्पन्न → Bank's Bank's Bank's Bank's Bank's Bank's Wicker completed in International Woman Day. → polarity: 0.0
📝 कोशी प्रदेशमा नबिल बैंकको ‘नबिल एसएसई कनेक्टः प्रदेशभर उद्यमशीलताको प्रबर्द्धन’ कार्यशाला सम्पन्न → Nabil SSE Connect 'Nabin Bank' Nabin Bank 'Nabin Bank' Nabin Bank 'Nabil SSE Connect: Porthestant Promotionality Prespective Enterprise Enterprise In Housewartal → polarity: 0.0
📝 नबिल बैंकद्वारा नबिल एसएसई कार्यक्रम सम्पन्न → Nabil Bank concludes SSE Program Program → polarity: 0.0
📝 नबिल बैंकद्वारा कतारमा ‘घर त नेपाल’ नामक बैंकिङ सेवाबारे जनचेतना कार्यक्रम सम्पन्न → Popicing program has been completed with the 'home' banking serv

In [15]:
import json
import pandas as pd
from collections import defaultdict
import os

# Load your full sentiment CSV
df = pd.read_csv("stock_sentiment_with_translation.csv")
df["date"] = pd.to_datetime(df["date"], errors="coerce")
df["sentiment"] = df["sentiment"].fillna("neutral")
df["date_str"] = df["date"].dt.strftime("%Y-%m-%d")

# Output structures
all_sentiment_data = {}
histogram = defaultdict(int)
timeline = defaultdict(int)

# Loop through each stock
for symbol in df["symbol"].unique():
    df_stock = df[df["symbol"] == symbol].copy()
    if df_stock.empty:
        continue

    total_articles = len(df_stock)
    sentiment_counts = df_stock["sentiment"].value_counts().to_dict()

    # Percent breakdown
    positive_percent = round((sentiment_counts.get("positive", 0) / total_articles) * 100, 2)
    neutral_percent = round((sentiment_counts.get("neutral", 0) / total_articles) * 100, 2)
    negative_percent = round((sentiment_counts.get("negative", 0) / total_articles) * 100, 2)

    # News list + chart prep
    articles = []
    bar_polarities = defaultdict(lambda: {"positive": 0, "neutral": 0, "negative": 0})
    line_labels = []
    line_scores = []

    for _, row in df_stock.iterrows():
        date_str = row["date_str"]
        sentiment = row["sentiment"]
        polarity = row["polarity"]

        articles.append({
            "title": row["title"],
            "link": row["link"],
            "date": date_str,
            "sentiment": sentiment,
            "polarity": polarity
        })

        # Bar polarity score (instead of count)
        if sentiment in ["positive", "neutral", "negative"]:
            bar_polarities[date_str][sentiment] = polarity

        # Line chart (all polarity scores)
        line_labels.append(date_str)
        line_scores.append(polarity)

        # Global sentiment breakdowns
        histogram[sentiment] += 1
        timeline[date_str] += 1

    # Top positive/negative
    df_sorted = df_stock.sort_values("polarity", ascending=False)
    top_positive = df_sorted.iloc[0][["title", "link", "polarity", "date_str"]].to_dict()
    top_negative = df_sorted.iloc[-1][["title", "link", "polarity", "date_str"]].to_dict()

    # Final bar chart data
    bar_labels = sorted(bar_polarities.keys())
    bar_data = {
        "positive": [bar_polarities[d]["positive"] for d in bar_labels],
        "neutral": [bar_polarities[d]["neutral"] for d in bar_labels],
        "negative": [bar_polarities[d]["negative"] for d in bar_labels],
    }

    # Add to master JSON
    all_sentiment_data[symbol] = {
        "symbol": symbol,
        "company": df_stock["company"].iloc[0],
        "total_articles": total_articles,
        "positive_percent": positive_percent,
        "neutral_percent": neutral_percent,
        "negative_percent": negative_percent,
        "top_positive_news": top_positive,
        "top_negative_news": top_negative,
        "articles": articles,
        "bar_labels": bar_labels,
        "bar_data": bar_data,
        "line_sentiment_labels": line_labels,
        "line_sentiment_scores": line_scores
    }

# Wrap everything together
combined_sentiment_json = {
    "stocks": all_sentiment_data,
    "charts": {
        "histogram": dict(histogram),
        "timeline": {
            "dates": list(timeline.keys()),
            "counts": list(timeline.values())
        }
    }
}

# Save to one JSON file
os.makedirs("sentiment_data", exist_ok=True)
with open("sentiment_data/sentiment_all_stocks.json", "w", encoding="utf-8") as f:
    json.dump(combined_sentiment_json, f, ensure_ascii=False, indent=2)

print("✅ All-in-one sentiment JSON enriched and saved: sentiment_data/sentiment_all_stocks.json")


✅ All-in-one sentiment JSON enriched and saved: sentiment_data/sentiment_all_stocks.json
