# Senior Project: Stock Market Analysis and Prediction

## Stock Market Data From Yahoo Finance API 

### Introduction

   Created by Hibah Agha, Jamie Serpico, Devyn Kipphut, and Brandon Le
   



In [None]:
import mercury as mr

In [None]:
import yfinance as yf
import pandas as pd
pd.options.mode.chained_assignment=None
import numpy as np
import math
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
import logging
logger = logging.getLogger("cmdstanpy")
logger.addHandler(logging.NullHandler())
logger.propogate = False
logger.setLevel(logging.CRITICAL)

In [None]:
def print_info(yf_tickers):
    if isinstance(yf_tickers, yf.Ticker):
        print(f"\n{'='*80}")
        space = ' '
        print(f"{space*33}{yf_tickers.info['symbol']}\n")
        for key in yf_tickers.info:
            print(f"--> {key:>29} : {yf_tickers.info[key]}")

    elif isinstance(yf_tickers, yf_Tickers):
        for ticker in yf_tickers.tickers:
            print(f"\n{'='*80}")
            space = ' '
            print(f"{space*33}{ticker.info['symbol']}\n")
            for key in ticker.info.keys():
                print(f"--> {key:>29} : {ticker.info[key]}")

def print_table(yf_tickers):
    if isinstance(yf_tickers, yf.Ticker):
        ticker = yf_tickers
        print(f"| {ticker.info.get('symbol', 'NONE'):<5} | {ticker.info.get('sector', 'NONE'):>25} | " + \
              f"{ticker.info.get('currency', 'NONE'):>4} | {ticker.info.get('quoteType', 'NONE'):>6} | " + \
              f"{ticker.info.get('shortName', 'NONE'):<35} |")
        
    elif isinstance(yf_tickers, yf.Tickers):
        for ticker in yf_tickers.tickers:
            print(f"| {ticker.info.get('symbol', 'NONE'):<5} | {ticker.info.get('sector', 'NONE'):>25} | " + \
              f"{ticker.info.get('currency', 'NONE'):>4} | {ticker.info.get('quoteType', 'NONE'):>6} | " + \
              f"{ticker.info.get('shortName', 'NONE'):<35} |")

In [None]:
#ticker = yf.Ticker('AAPL')
#df = yf.download('AAPL', start='2018-03-21', end='2023-03-21')
# For cryptocurrencies, add -USD at the end of your Ticker name
#tickerName = input('What is your Stock ticker name? (e.g. AAPL = Apple)').upper()
show_code = mr.Checkbox(label="Show Code", value=False)
app = mr.App(title="Senior Project", description="Stock Market Analysis and Prediction using Yahoo Finance", show_code=show_code.value)
tickerNameBox = mr.Text(label="Step 1: Choose a Ticker")
startDateBox = mr.Text(label="Step 2: Pick start date in this format: YYYY-MM-DD")
endDateBox = mr.Text(label="Step 3: Pick end date in this format: YYYY-MM-DD")
epochBox =  mr.Slider(value=5, min=1, max=10, label="Step 4: Choose a number of Epochs. (Higher is more accurate)", step=1)
tickerName = tickerNameBox.value
startDate = startDateBox.value
endDate = endDateBox.value
epochCount = epochBox.value
if startDate == "" or endDate == "" or tickerName == "":
    mr.Stop()
#note = mr.Note(text="Step 1. Choose a Ticker Name")
#note2 = mr.Note(text="Step 2. Choose a Start Date (ie. YYYY-MM-DD)")
#note3 = mr.Note(text="Step 3. Choose an End Date")

In [None]:
mr.Markdown(f"""## You have chosen the stock:  {tickerName}!
We will show you info and predictions from {startDate} to {endDate}
""")
# print(tickerName)
# print(startDate)
# print(endDate)
df = yf.download(tickerName, startDate, endDate)


In [None]:
df
# Close is the raw close and Adj Close is the Closing added with other stock attributes
# df.shape

In [None]:
df.shape

In [None]:
mr.Md(f"""
##Showing Graph of History for {tickerName}
""")

plt.figure(figsize=(16,8))
plt.title('Close Price History of ' +str(tickerName))
plt.plot(df['Close'])
plt.xlabel('Date', fontsize = 18)
plt.ylabel('Close Price USD ($)', fontsize = 18)
plt.show()

In [None]:
data = df.filter(['Close'])
#Convert the dataframe to a numpy array
dataset = data.values
#Get the number of rows to train the model on
training_data_len = math.ceil(len(dataset) * .8)

training_data_len

In [None]:
#Scale the data
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(dataset)

#scaled_data


In [None]:
#Create the training data set
#Create the scaled training set

train_data = scaled_data[0:training_data_len, :]
#Split the data into x_train and y_train datasets

x_train = []
y_train = []

for i in range(60, len(train_data)):
    x_train.append(train_data[i-60:i, 0])
    y_train.append(train_data[i, 0])
    # if i<= 61:
        # print(x_train)
        # print(y_train)
        # print()

In [None]:
x_train, y_train = np.array(x_train), np.array(y_train)


In [None]:
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_train.shape

In [None]:
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(x_train.shape[1], 1)))

model.add(LSTM(50, return_sequences=False))

model.add(Dense(25, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [None]:
model.compile(optimizer='adam', loss='mean_squared_error', metrics = ['mean_absolute_error'])
#mean absolute error takes in the absolute difference of predicted values - actual value
#mean squared error is used for linear regression problems like this


In [None]:
model.fit(x_train, y_train, batch_size=1, epochs=epochCount)

In [None]:
test_data = scaled_data[training_data_len - 60: , :]
x_test = []
y_test = dataset[training_data_len:, :]
for i in range(60, len(test_data)):
    x_test.append(test_data[i-60:i, 0])

In [None]:
x_test = np.array(x_test)


In [None]:
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

In [None]:
#Get the models preducted price value
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)

In [None]:
#Get the root mean squared (RMSE)
rmse=np.sqrt(np.mean(((predictions- y_test)**2)))
rmse

In [None]:
#Plot the data
train = data[:training_data_len]
valid = data[training_data_len:]
valid['Predictions'] = predictions

#Visualize data
plt.figure(figsize=(16,8))
plt.title('Model')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price USD ($)', fontsize=18)
plt.plot(train['Close'])
plt.plot(valid[['Close', 'Predictions']])
plt.legend(['Train', 'Val', 'Predictions'], loc='lower right')
plt.show()

In [None]:
#Show the valid and predicted prices
valid

In [None]:
from prophet import Prophet
# Preprocess the data to match Prophet's required format
df = df.reset_index()
df = df.rename(columns={'Date': 'ds', 'Close': 'y'})
df = df[['ds', 'y']]

# Initialize the Prophet model
m = Prophet()

# Fit the model to the data
m.fit(df)

# Make predictions for the future
future = m.make_future_dataframe(periods=365)

# Generate the forecast
forecast = m.predict(future)

# Plot the forecast
fig = m.plot(forecast)
plt.title('Prediction of the '+str(tickerName)+ ' Stock Price using Prophet')
plt.xlabel('Date')
plt.ylabel('Close Stock Price')
plt.show()
#black spots are the actual prices

In [None]:
import datetime

# Get today's date
today = datetime.datetime.now().date()
userDate = datetime.datetime.strptime(endDate, '%Y-%m-%d').date()
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
# Extract the dates and predicted closing prices from the forecast dataframe
forecast_dates = pd.to_datetime(forecast['ds'])
forecast_prices = forecast['yhat']

# Combine the dates and prices into a new dataframe
predicted_prices = pd.concat([forecast_dates, forecast_prices], axis=1)
predicted_prices.columns = ['Date', 'Predicted Close']

# Filter the dataframe to show only the dates after today
predicted_prices = predicted_prices.loc[predicted_prices['Date'].dt.date > userDate]

# Print the predicted prices with their dates
predicted_prices