<a href="https://colab.research.google.com/github/harshita20022002/gtete/blob/main/LSTM_Stock_Price_Prediction_Model_(using_pandas_ta).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Uninstall existing pandas_ta and numpy
print("Uninstalling existing pandas_ta and numpy...")
!pip uninstall -y pandas_ta numpy
print("Uninstallation complete.")

# Install a compatible numpy version that works with pandas_ta and common Colab packages
# numpy 1.26.4 is a good balance for current Colab environments as of mid-2024
print("Installing numpy version 1.26.4...")
!pip install numpy==1.26.4
print("Numpy installed.")

# Install pandas_ta (it should now use the compatible numpy)
print("Installing pandas_ta...")
!pip install pandas_ta
print("pandas_ta installed.")

Uninstalling existing pandas_ta and numpy...
[0mFound existing installation: numpy 1.23.5
Uninstalling numpy-1.23.5:
  Successfully uninstalled numpy-1.23.5
Uninstallation complete.
Installing numpy version 1.26.4...
Collecting numpy==1.26.4
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.3/18.3 MB[0m [31m75.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
thinc 8.3.6 requires numpy<3.0.0,>=2.0.0, but you have numpy 1.26.4 which is incompatible.[

Numpy installed.
Installing pandas_ta...
Collecting pandas_ta
  Using cached pandas_ta-0.3.14b0-py3-none-any.whl
Installing collected packages: pandas_ta
Successfully installed pandas_ta-0.3.14b0
pandas_ta installed.


In [4]:
import yfinance as yf
import pandas as pd
import pandas_ta as ta

def get_stock_data_and_indicators(ticker, start_date, end_date):
    """
    Fetches historical stock data and applies various technical indicators.

    Args:
        ticker (str): Stock ticker symbol (e.g., 'AAPL').
        start_date (str): Start date for data fetching (YYYY-MM-DD).
        end_date (str): End date for data fetching (YYYY-MM-DD).

    Returns:
        pandas.DataFrame: DataFrame with stock data and technical indicators.
    """
    print(f"Fetching historical data for {ticker} from {start_date} to {end_date}...")
    try:
        # Fetch data using yfinance
        # FutureWarning: YF.download() has changed argument auto_adjust default to True
        # This warning means 'auto_adjust' is now True by default, which is generally desired
        # as it adjusts for splits and dividends.
        data = yf.download(ticker, start=start_date, end=end_date)
        print("Data fetched successfully. Head of the data:")
        print(data.head())

        # Handle MultiIndex columns if present
        if isinstance(data.columns, pd.MultiIndex):
            print("Detected MultiIndex columns. Attempting to flatten them...")
            # Flatten MultiIndex columns by joining level names
            data.columns = ['_'.join(col).strip() for col in data.columns.values]
            print("MultiIndex columns flattened.")

        # Convert column names to lowercase for consistency
        data.columns = [col.lower() for col in data.columns]
        print("Column names converted to lowercase.")

        # Select core features. Adjust these based on your actual column names after flattening.
        # Common expected columns are 'open', 'high', 'low', 'close', 'volume'
        # Ensure 'adj close' is handled if 'close' isn't adjusted
        required_cols = ['open', 'high', 'low', 'close', 'volume']
        # If 'adj close' is the primary price, use it for calculations.
        # pandas_ta usually uses 'close' by default, which should ideally be adjusted.
        if 'adj close' in data.columns:
            data.rename(columns={'adj close': 'close'}, inplace=True)
            print("Renamed 'adj close' to 'close' for consistency.")

        data = data[[col for col in required_cols if col in data.columns]].copy()

        print("\nDataFrame after robust column handling and selecting core features:")
        print(data.head())

        print("\nApplying technical indicators and adding custom features using pandas_ta...")

        # --- Custom Features ---
        data['daily_range'] = data['high'] - data['low']
        print("Added 'daily_range' column.")

        # Using .loc to avoid SettingWithCopyWarning
        data.loc[:, 'is_up_day'] = (data['close'] > data['open']).astype(int)
        print("Added 'is_up_day' column (0 or 1).")

        # Price volatility (e.g., 5-day rolling standard deviation of daily returns)
        data.loc[:, 'daily_return'] = data['close'].pct_change()
        data.loc[:, 'price_volatility_5d'] = data['daily_return'].rolling(window=5).std()
        print("Added 'price_volatility_5d' column.")


        # --- pandas_ta Technical Indicators ---

        # Add all common indicators to the DataFrame. This is a quick way to add many.
        # The 'append=True' argument directly adds the indicators to your DataFrame.
        # This will add columns like 'SMA_10', 'EMA_20', 'RSI_14', 'MACD_12_26_9', 'MACDH_12_26_9', 'MACDS_12_26_9' etc.
        data.ta.strategy("All", append=True)
        print("Added various indicators using ta.strategy('All').")

        # Specifically for MACD, if you want to ensure the columns are present and named correctly,
        # you can call it directly and inspect the resulting DataFrame.
        # macd_output = ta.macd(data['close'], append=True)
        # print(f"MACD columns generated: {macd_output.columns.tolist()}")

        # Verification and Renaming (if necessary for MACD, though 'All' strategy should include it)
        # Check if MACD columns exist and rename if pandas_ta produces different names
        expected_macd_cols = ['MACD_12_26_9', 'MACDH_12_26_9', 'MACDS_12_26_9']
        found_macd_cols = [col for col in expected_macd_cols if col in data.columns]

        if len(found_macd_cols) < len(expected_macd_cols):
            print("\nWarning: Not all expected MACD columns were found. Recalculating MACD explicitly.")
            # Recalculate MACD and ensure it's merged
            macd_series = ta.macd(data['close'], append=False) # append=False returns a DataFrame
            # If macd_series is not None and not empty, merge it
            if macd_series is not None and not macd_series.empty:
                # Ensure we don't have duplicate columns before joining
                for col in macd_series.columns:
                    if col in data.columns:
                        data.drop(columns=[col], inplace=True)
                data = data.join(macd_series)
                print("Explicitly added MACD, MACDH, MACDS columns.")
            else:
                print("Could not explicitly add MACD columns.")

        # After ensuring MACD columns are present, you can safely access them
        if 'MACDS_12_26_9' in data.columns:
            print("\n'MACDS_12_26_9' column successfully created and found.")
            print("Head of MACD related columns:")
            print(data[['MACD_12_26_9', 'MACDH_12_26_9', 'MACDS_12_26_9']].tail())
        else:
            print("\nError: 'MACDS_12_26_9' column still not found after attempts.")

        # Drop any NaN values introduced by rolling windows or indicators
        data.dropna(inplace=True)
        print("\nNaN values dropped.")
        print("\nFinal DataFrame head:")
        print(data.head())
        print("\nFinal DataFrame Info:")
        data.info()

        return data

    except Exception as e:
        print(f"An error occurred: {e}")
        return None

# Example usage:
ticker_symbol = 'AAPL'
start = '2015-01-01'
end = '2024-06-20'

df_aapl = get_stock_data_and_indicators(ticker_symbol, start, end)

if df_aapl is not None:
    print("\nData processing complete. DataFrame is ready for further analysis.")


  data = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed

Fetching historical data for AAPL from 2015-01-01 to 2024-06-20...
Data fetched successfully. Head of the data:
Price           Close       High        Low       Open     Volume
Ticker           AAPL       AAPL       AAPL       AAPL       AAPL
Date                                                             
2015-01-02  24.288580  24.757334  23.848706  24.746226  212818400
2015-01-05  23.604336  24.137516  23.417723  24.057539  257142000
2015-01-06  23.606560  23.866485  23.244440  23.668764  263188400
2015-01-07  23.937574  24.037545  23.704307  23.815387  160423600
2015-01-08  24.857313  24.915075  24.148627  24.266372  237458000
Detected MultiIndex columns. Attempting to flatten them...
MultiIndex columns flattened.
Column names converted to lowercase.

DataFrame after robust column handling and selecting core features:
Empty DataFrame
Columns: []
Index: [2015-01-02 00:00:00, 2015-01-05 00:00:00, 2015-01-06 00:00:00, 2015-01-07 00:00:00, 2015-01-08 00:00:00]

Applying technical indi




In [None]:
pip install pandas_ta