<a href="https://colab.research.google.com/github/jeva4a4556/coursera/blob/master/STOCKDATA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
import time
import talib as ta
import itertools

def get_stock_data(ticker, start_date, end_date):
    # Fetches stock data using Yahoo Finance API
    data = yf.download(ticker, start=start_date, end=end_date, progress=False)
    return data


def generate_technical_indicators(data):
    # Generate technical indicators: SMA, EMA, MACD, and RSI
    data['SMA'] = ta.SMA(data['Close'], timeperiod=14)
    data['EMA'] = ta.EMA(data['Close'], timeperiod=14)
    macd, macd_signal, _ = ta.MACD(data['Close'], fastperiod=12, slowperiod=26, signalperiod=9)
    data['MACD'] = macd - macd_signal
    data['RSI'] = ta.RSI(data['Close'], timeperiod=14)

    # Drop missing values created by the indicator calculations
    data = data.dropna()

    return data


def calculate_rsi(prices, window=14):
    # Calculates the Relative Strength Index (RSI)
    delta = prices.diff()
    gain = delta.mask(delta < 0, 0)
    loss = -delta.mask(delta > 0, 0)
    avg_gain = gain.rolling(window).mean()
    avg_loss = loss.rolling(window).mean()
    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi


def generate_labels(data, target_shift):
    # Generates target labels by shifting the 'Close' price
    data.loc[:, 'Target'] = np.where(data['Close'].shift(-target_shift) > data['Close'], 1, 0)
    return data


def find_best_indicators(data, indicators):
    # Finds the best indicators by checking their availability and performance
    available_indicators = []
    for indicator in indicators:
        if indicator in data.columns:
            available_indicators.append(indicator)
    return available_indicators


def preprocess_data(data, indicators):
    # Preprocesses the data by imputing missing values
    imputer = SimpleImputer(strategy='mean')
    data.loc[:, indicators] = imputer.fit_transform(data[indicators])
    return data


def train_strategy_model(data, indicators, time_limit):
    # Trains the strategy model using the selected indicators
    strategy_model = RandomForestClassifier(random_state=42)
    start_time = time.time()
    elapsed_time = 0

    if data.empty or indicators == []:
        return None, None, None

    while elapsed_time < time_limit:
        X_train, X_test, y_train, y_test = train_test_split(data[indicators], data['Target'], test_size=0.2,
                                                            random_state=42)

        # Check if the data and target labels are not empty
        if X_train.empty or y_train.empty or X_test.empty or y_test.empty:
            return None, None, None

        strategy_model.fit(X_train, y_train)
        elapsed_time = time.time() - start_time
        predictions = strategy_model.predict(X_test)
        accuracy = accuracy_score(y_test, predictions)
        rmse = np.sqrt(mean_squared_error(y_test, predictions))

    return strategy_model, accuracy, rmse


def evaluate_strategy_model(data, strategy_model, indicators):
    # Evaluates the performance of the strategy model using test data
    if strategy_model is None:
        return None, None, None

    X = data[indicators]
    y = data['Target']
    predictions = strategy_model.predict(X)
    accuracy = accuracy_score(y, predictions)
    rmse = np.sqrt(mean_squared_error(y, predictions))
    return accuracy, rmse


def find_best_strategy(data, strategies, time_limit, best_indicators):
    best_strategy = None
    best_accuracy = -1
    best_rmse = -1

    for strategy in strategies:
        strategy_indicators = [indicator for indicator, flag in zip(best_indicators, strategy) if flag]
        strategy_model, accuracy, rmse = train_strategy_model(data, strategy_indicators, time_limit)

        if accuracy is not None and (accuracy > best_accuracy or (
                accuracy == best_accuracy and rmse is not None and rmse < best_rmse)):
            best_strategy = strategy_model
            best_accuracy = accuracy
            best_rmse = rmse

    return best_strategy, best_accuracy, best_rmse


def main():
    # Define the parameters
    ticker = 'URBN'
    start_date = '2022-03-01'
    end_date = '2023-05-17'
    target_shift = 5
    time_limit = 15  # seconds

    # Get the stock data
    data = get_stock_data(ticker, start_date, end_date)

    # Generate technical indicators
    data = generate_technical_indicators(data)

    # Generate target labels
    data = generate_labels(data, target_shift)

    # Find the best indicators
    indicators = ['SMA', 'EMA', 'MACD', 'RSI']
    best_indicators = find_best_indicators(data, indicators)
    print("Best Indicators:", best_indicators)

    # Preprocess the data
    data = preprocess_data(data, best_indicators)

    # Define strategies
    strategies = list(itertools.product([0, 1], repeat=len(best_indicators)))

    # Find the best strategy
    best_strategy_model, best_accuracy, best_rmse = find_best_strategy(data, strategies, time_limit, best_indicators)

    print("Best Strategy Model:", best_strategy_model)
    print("Best Accuracy:", best_accuracy)
    print("Best RMSE:", best_rmse)

    # Train the best strategy model
    strategy_model, _, _ = train_strategy_model(data, best_indicators, time_limit)

    # Make prediction on the latest data
    latest_data = get_stock_data(ticker, end_date, end_date)  # Get the latest data point
    latest_data = generate_technical_indicators(latest_data)
    latest_data = preprocess_data(latest_data, best_indicators)

    # Predict the stock price
    prediction = strategy_model.predict(latest_data)
    print("Prediction:", prediction)


if __name__ == "__main__":
    main()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, 'Target'] = np.where(data['Close'].shift(-target_shift) > data['Close'], 1, 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, indicators] = imputer.fit_transform(data[indicators])


Best Indicators: ['SMA', 'EMA', 'MACD', 'RSI']
