In [None]:
import os
import yfinance as yf
import pandas as pd
import requests
from bs4 import BeautifulSoup
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import csv
from PIL import Image, ImageDraw

def get_sp500_tickers():
    url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        table = soup.find('table', {'id': 'constituents'})
        tickers = []
        for row in table.findAll('tr')[1:]:
            ticker = row.findAll('td')[0].text.strip()
            tickers.append(ticker)
        return tickers
    else:
        print(f"Failed to fetch S&P 500 tickers: {response.status_code}")
        return []

def get_nasdaq_tickers():
    url = "https://en.wikipedia.org/wiki/NASDAQ-100"
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        table = soup.find('table', {'id': 'constituents'})
        tickers = []
        for row in table.findAll('tr')[1:]:
            ticker = row.findAll('td')[1].text.strip()  # Ticker symbol in second column
            tickers.append(ticker)
        return tickers
    else:
        print(f"Failed to fetch NASDAQ tickers: {response.status_code}")
        return []

def get_nse_tickers():
    tickers = [
        'RELIANCE.NS', 'TCS.NS', 'INFY.NS', 'HDFCBANK.NS', 'ICICIBANK.NS',
        'HINDUNILVR.NS', 'SBIN.NS', 'BHARTIARTL.NS', 'HCLTECH.NS', 'ITC.NS'
    ]
    return tickers

def get_data(ticker, start, end):
    df = yf.download(ticker, start=start, end=end)
    return df

def calculate_bollinger_bands(df, window=20, no_of_std=2):
    df['SMA'] = df['Close'].rolling(window).mean()
    df['STD'] = df['Close'].rolling(window).std()
    df['Upper'] = df['SMA'] + (df['STD'] * no_of_std)
    df['Lower'] = df['SMA'] - (df['STD'] * no_of_std)
    return df

def find_signals(df):
    df['Buy_Signal'] = (df['Close'] < df['Lower']) & (df['Close'].shift(1) > df['Lower'].shift(1))
    df['Sell_Signal'] = (df['Close'] > df['Upper']) & (df['Close'].shift(1) < df['Upper'].shift(1))
    return df

def plot_bollinger_bands_yearly(ticker, df, year, output_dir):
    df_year = df[df['Date'].dt.year == year]
    if df_year.empty:
        return None, None

    fig, ax = plt.subplots(figsize=(10, 6))
    ax.plot(df_year['Date'], df_year['Close'], label='Close Price')
    ax.plot(df_year['Date'], df_year['Upper'], label='Upper Band')
    ax.plot(df_year['Date'], df_year['Lower'], label='Lower Band')
    ax.plot(df_year['Date'], df_year['SMA'], label='SMA')

    buy_signals = df_year[df_year['Buy_Signal']]
    sell_signals = df_year[df_year['Sell_Signal']]
    bounding_boxes = []

    filename = f"{ticker}_{year}.png"
    filepath = os.path.join(output_dir, filename)
    plt.savefig(filepath)
    plt.close(fig)

    img = Image.open(filepath)
    draw = ImageDraw.Draw(img)
    img_width, img_height = img.size

    for i, row in buy_signals.iterrows():
        date_num = mdates.date2num(row['Date'])
        x, y = ax.transData.transform((date_num, row['Close']))
        y = img_height - y
        box_width = 40
        box_height = 40
        xmin = int(x - box_width // 2)
        ymin = int(y - box_height // 2)
        xmax = int(x + box_width // 2)
        ymax = int(y + box_height // 2)
        bounding_boxes.append([xmin, ymin, xmax, ymax, "buy_signal"])

    for i, row in sell_signals.iterrows():
        date_num = mdates.date2num(row['Date'])
        x, y = ax.transData.transform((date_num, row['Close']))
        y = img_height - y
        box_width = 40
        box_height = 40
        xmin = int(x - box_width // 2)
        ymin = int(y - box_height // 2)
        xmax = int(x + box_width // 2)
        ymax = int(y + box_height // 2)
        bounding_boxes.append([xmin, ymin, xmax, ymax, "sell_signal"])

    img.save(filepath)

    return bounding_boxes, filename

def visualize_bounding_boxes(image_dir, annotations_file, num_images=5):
    annotations = pd.read_csv(annotations_file)

    sample_images = annotations['image_filename'].unique()[:num_images]

    for image_filename in sample_images:
        image_path = os.path.join(image_dir, image_filename)
        img = Image.open(image_path)
        draw = ImageDraw.Draw(img)

        image_annotations = annotations[annotations['image_filename'] == image_filename]

        for _, row in image_annotations.iterrows():
            xmin = row['xmin']
            ymin = row['ymin']
            xmax = row['xmax']
            ymax = row['ymax']
            label = row['label']
            color = "red" if label == "buy_signal" else "blue"

            draw.rectangle([xmin, ymin, xmax, ymax], outline=color)

        img.show()

tickers = []
try:
    sp500_tickers = get_sp500_tickers()
    nasdaq_tickers = get_nasdaq_tickers()
    nse_tickers = get_nse_tickers()

    additional_tickers = [
        'AAPL', 'MSFT', 'GOOGL', 'AMZN', 'FB', 'BRK-B', 'JNJ', 'V', 'WMT', 'PG',
        'XOM', 'JPM', 'UNH', 'DIS', 'NVDA', 'HD', 'MA', 'PYPL', 'CMCSA', 'VZ',
        'NFLX', 'KO', 'PFE', 'MRK', 'PEP', 'T', 'ABT', 'ABBV', 'CVX', 'MCD',
        'WFC', 'NKE', 'INTC', 'TMO', 'MDT', 'ORCL', 'COST', 'DHR', 'TMUS', 'ADBE'
    ]

    tickers = sp500_tickers + nasdaq_tickers + nse_tickers + additional_tickers
    tickers = tickers[:1100]

    print(f"Number of tickers: {len(tickers)}")
    print(f"First few tickers: {tickers[:5]}")
except Exception as e:
    print(f"An error occurred while fetching tickers: {e}")

if tickers:
    output_dir = 'bollinger_band_images'
    os.makedirs(output_dir, exist_ok=True)
    print(f"Directory '{output_dir}' created or already exists.")

    annotations = []
    years = range(2000, 2020)

    try:
        for ticker in tickers:
            print(f"Processing ticker: {ticker}")
            df = get_data(ticker, start='2000-01-01', end='2019-12-31')
            if df.empty:
                print(f"No data for {ticker}. Skipping.")
                continue
            df['Date'] = pd.to_datetime(df.index)
            df = calculate_bollinger_bands(df)
            df = find_signals(df)

            for year in years:
                bounding_boxes, filename = plot_bollinger_bands_yearly(ticker, df, year, output_dir)
                if filename and bounding_boxes:
                    for bbox in bounding_boxes:
                        xmin, ymin, xmax, ymax, label = bbox
                        annotations.append([filename, xmin, ymin, xmax, ymax, label])

    except Exception as e:
        print(f"An error occurred during processing: {e}")

    try:
        annotations_file = 'annotations.csv'
        with open(annotations_file, 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(["image_filename", "xmin", "ymin", "xmax", "ymax", "label"])
            writer.writerows(annotations)
        print(f"Annotations saved to {annotations_file}.")
    except Exception as e:
        print(f"An error occurred while saving annotations: {e}")

    visualize_bounding_boxes(output_dir, annotations_file)
