In [None]:
# Cell 1: Setup
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os

# Add the parent directory to path
sys.path.append('..')

# Import our classes using absolute paths
import yfinance as yf

In [None]:
# Cell 2: Download data directly
symbols = ['AAPL', 'GOOGL', 'MSFT', 'TSLA', 'NVDA']
for symbol in symbols:
    try:
        stock = yf.Ticker(symbol)
        df = stock.history(period="1y")
        df.to_csv(f'../data/raw/{symbol}_1y.csv')
        print(f"✅ Downloaded {len(df)} rows for {symbol}")
    except Exception as e:
        print(f"❌ Error with {symbol}: {e}")

In [None]:
# Cell 3: Feature engineering
def prepare_features(df):
    """Complete feature preparation pipeline"""
    # Moving averages
    df['SMA_20'] = df['Close'].rolling(window=20).mean()
    df['SMA_50'] = df['Close'].rolling(window=50).mean()
    
    # MACD
    exp1 = df['Close'].ewm(span=12).mean()
    exp2 = df['Close'].ewm(span=26).mean()
    df['MACD'] = exp1 - exp2
    df['MACD_signal'] = df['MACD'].ewm(span=9).mean()
    df['MACD_diff'] = df['MACD'] - df['MACD_signal']
    
    # RSI calculation
    delta = df['Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))
    
    # Bollinger Bands
    df['BB_middle'] = df['Close'].rolling(window=20).mean()
    bb_std = df['Close'].rolling(window=20).std()
    df['BB_upper'] = df['BB_middle'] + (bb_std * 2)
    df['BB_lower'] = df['BB_middle'] - (bb_std * 2)
    
    # Price patterns
    df['price_change_1d'] = df['Close'].pct_change(1)
    df['price_change_5d'] = df['Close'].pct_change(5)
    df['volume_ratio'] = df['Volume'] / df['Volume'].rolling(window=20).mean()
    
    # Target variable
    df['future_price'] = df['Close'].shift(-5)
    df['price_change_future'] = (df['future_price'] - df['Close']) / df['Close']
    df['target'] = np.where(df['price_change_future'] > 0.02, 1, 0)
    
    return df.dropna()

In [None]:
# Get ticker and time period from user
ticker = input("Enter the stock ticker (e.g., AAPL): ")
time_period = input("Enter the time period (e.g., 1y, 5y, 1mo): ")

# Construct the file path using user input
file_path = f"../data/raw/{ticker}_{time_period}.csv"
print(f"Loading data from: {file_path}")

# Load and process the data
try:
    df = pd.read_csv(file_path, index_col=0, parse_dates=True)
    df_features = prepare_features(df)
    print(f"✅ Processed {len(df_features)} rows with features for {ticker} over {time_period}")
except FileNotFoundError:
    print(f"❌ Error: File not found for ticker '{ticker}' and period '{time_period}'. Please check the inputs and ensure the file exists.")
    df_features = None  # Set df_features to None to handle the error gracefully

In [None]:
import matplotlib.pyplot as plt

if df_features is not None:
    plt.figure(figsize=(15, 10))

    # Plot 1: Stock Price
    plt.subplot(2, 1, 1)
    plt.plot(df_features.index, df_features['Close'])
    plt.title(f"{ticker} Stock Price over {time_period}")
    plt.ylabel("Price")
    plt.xlabel("Date")

    # Plot 2: Target Distribution
    plt.subplot(2, 1, 2)
    df_features['target'].value_counts().plot(kind='bar')
    plt.title(f"Target Distribution for {ticker} over {time_period}")
    plt.xlabel("Price Change (1: Up, 0: Down)")
    plt.ylabel("Count")

    plt.tight_layout()
    plt.show()
else:
    print("Cannot visualize data: An error occurred during the data loading step.")