In [103]:
import kagglehub
import csv
from datetime import datetime
from decimal import Decimal, getcontext

In [104]:

# Download latest version
path = kagglehub.dataset_download("dgawlik/nyse")

print("Path to dataset files:", path)

Path to dataset files: C:\Users\USER\.cache\kagglehub\datasets\dgawlik\nyse\versions\3


In [105]:

# Set the precision for decimal operations
getcontext().prec = 28  # A high precision level for financial calculations

# Reads stock data from the CSV file for a specific symbol
def read_stock_data(filepath, symbol):
    stock_data = []
    with open(filepath, mode='r') as file:
        reader = csv.reader(file)
        next(reader)  # Skip the header
        for row in reader:
            # Filter by symbol
            if row[1] == symbol:
                date = datetime.strptime(row[0], '%Y-%m-%d')
                close_price = Decimal(row[3])
                stock_data.append((date, close_price))
    return stock_data

In [106]:
# Calculates the daily price changes
def calculate_price_changes(stock_data):
    changes = []
    for i in range(1, len(stock_data)):
        date = stock_data[i][0]
        price_change = stock_data[i][1] - stock_data[i-1][1]
        changes.append((price_change))
    return changes


In [107]:
# Max profit using Divide and Conquer (DAC)
def max_crossing_profit(prices, low, mid, high):
    left_profit = Decimal('-inf')
    right_profit = Decimal('-inf')
    left_min_idx = right_max_idx = mid
    total_left = total_right =Decimal(0)

    for i in range(mid, low-1, -1):
        total_left += prices[i]
        if total_left > left_profit:
            left_profit = total_left
            left_min_idx = i

    for i in range(mid+1, high+1):
        total_right += prices[i]
        if total_right > right_profit:
            right_profit = total_right
            right_max_idx = i

    return left_min_idx, right_max_idx, left_profit + right_profit

In [108]:
def max_profit_dac(prices, low, high):
    if low == high:
        return low, high, Decimal(0)

    mid = (low + high) // 2
    left_min, left_max, left_profit = max_profit_dac(prices, low, mid)
    right_min, right_max, right_profit = max_profit_dac(prices, mid+1, high)
    cross_min, cross_max, cross_profit = max_crossing_profit(
        prices, low, mid, high)

    if left_profit >= right_profit and left_profit >= cross_profit:
        return left_min, left_max, left_profit
    elif right_profit >= left_profit and right_profit >= cross_profit:
        return right_min, right_max, right_profit
    else:
        return cross_min, cross_max, cross_profit
  

In [109]:
# Finds the best buy and sell days for a specific stock using DAC
def find_best_buy_and_sell(filepath, symbol):
    stock_data = read_stock_data(filepath, symbol)  # Get stock data

    if len(stock_data) < 2:
        print(f"Not enough data for symbol: {symbol}")
        return

    price_changes = calculate_price_changes(
        stock_data)  # Calculate price changes

    buy_idx, sell_idx, max_profit = max_profit_dac(
        price_changes, 0, len(price_changes) - 1)  # Apply DAC

    # Results
    print(f"{symbol} Profit: {max_profit} buy on day: {buy_idx} ({stock_data[buy_idx][0].strftime('%Y-%m-%d')}) "
          f"and sell on day: {sell_idx} ({stock_data[sell_idx][0].strftime('%Y-%m-%d')})")


# Test for AAPL
filepath = "C:/Users/USER/.cache/kagglehub/datasets/dgawlik/nyse/versions/3/prices-split-adjusted.csv"
symbol = "AAPL"  # Specify the stock symbol to be checked
find_best_buy_and_sell(filepath, symbol)

AAPL Profit: 105.5642852857 buy on day: 22 (2010-02-04) and sell on day: 1291 (2015-02-20)


In [110]:
# Reads the securities file to get all the stock symbols and company names
def read_securities(filepath):
    securities = {}
    with open(filepath, mode='r') as file:
        reader = csv.reader(file)
        next(reader)  # Skip the header
        for row in reader:
            symbol = row[0]
            company_name = row[1]
            securities[symbol] = company_name
    return securities

In [111]:
# Finding the best stock by applying DAC to all stock symbols
def find_best_stock(prices_filepath, securities_filepath):
    securities = read_securities(securities_filepath)
    best_profit = Decimal('-inf')
    best_company = None
    best_buy_date = best_sell_date = None

    for symbol, company in securities.items():
        stock_data = read_stock_data(prices_filepath, symbol)

        if len(stock_data) < 2:
            # Skip stocks with no data
            continue

        # Calculate price changes
        changes = calculate_price_changes(stock_data)

        if len(changes) < 2:
            continue

        # Divide and Conquer algorithm on price changes
        buy_idx, sell_idx, max_profit = max_profit_dac(
            changes, 0, len(changes) - 1)

        if max_profit > best_profit:
            best_profit = max_profit
            best_company = company
            best_buy_date = stock_data[buy_idx][0]
            best_sell_date = stock_data[sell_idx][0]

    print(f"Best stock to buy: \"{best_company}\" on {best_buy_date} and sell on {
          best_sell_date} with profit of {best_profit}")


In [112]:

# Test to find best stock
prices_filepath = "C:/Users/USER/.cache/kagglehub/datasets/dgawlik/nyse/versions/3/prices-split-adjusted.csv"
securities_filepath = "C:/Users/USER/.cache/kagglehub/datasets/dgawlik/nyse/versions/3/securities.csv"
find_best_stock(prices_filepath, securities_filepath)

Best stock to buy: "Priceline.com Inc" on 2010-06-09 00:00:00 and sell on 2016-11-07 00:00:00 with profit of 1402.940003
