In [None]:
!pip install hmmlearn

!pip install pandas-DataReader

In [None]:
!pip install yfinance

In [None]:
!pip install pandas numpy matplotlib seaborn yfinance pandas_datareader

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
plt.style.use("fivethirtyeight")
%matplotlib inline

# For reading stock data from yahoo
from pandas_datareader.data import DataReader
import yfinance as yf
from pandas_datareader import data as pdr

yf.pdr_override()

# For time stamps
from datetime import datetime


# The tech stocks we'll use for this analysis
tech_list = ['AAPL', 'GOOG', 'MSFT', 'AMZN', 'NVDA', 'ADBE', 'CRM', 'ACN', 'CSCO', 'MA']

end = datetime.now()

# Accept user input for start date
start_input = input("Enter the start date (YYYY-MM-DD): ")
start = datetime.strptime(start_input, "%Y-%m-%d")

# Accept user input for end date
end_input = input("Enter the end date (YYYY-MM-DD): ")
end = datetime.strptime(end_input, "%Y-%m-%d")

# Accept user input for company name
company_name = input("Enter the company name from the following list: APPLE, GOOGLE, MICROSOFT, AMAZON, NVIDIA, ADOBE, SALESFORCE, ACCENTURE, CISCO, MASTERCARD: ")

# Validate user input for company name
while company_name not in ['APPLE', 'GOOGLE', 'MICROSOFT', 'AMAZON', 'NVIDIA', 'ADOBE', 'SALESFORCE', 'ACCENTURE', 'CISCO', 'MASTERCARD']:
    print("Invalid company name. Please choose from the following list: APPLE, GOOGLE, MICROSOFT, AMAZON, NVIDIA, ADOBE, SALESFORCE, ACCENTURE, CISCO, MASTERCARD")
    company_name = input("Enter the company name: ")

# Set up End and Start times for data grab
for stock in tech_list:
    globals()[stock] = yf.download(stock, start, end)

company_list = [AAPL, GOOG, MSFT, AMZN, NVDA, ADBE, CRM, ACN, CSCO, MA]
company_names = ["APPLE", "GOOGLE", "MICROSOFT", "AMAZON", "NVIDIA", "ADOBE", "SALESFORCE", "ACCENTURE", "CISCO",
                 "MASTERCARD"]

for company, com_name in zip(company_list, company_names):
    company["company_name"] = com_name

# Filter the DataFrame based on the selected company name
selected_company_df = pd.concat(company_list, axis=0)
selected_company_df = selected_company_df[selected_company_df['company_name'] == company_name]

selected_company_df.tail(10)

In [None]:
selected_company_df.describe()

In [None]:
selected_company_df.info()

In [None]:
# Calculate daily returns
selected_company_df['Daily_Return'] = selected_company_df['Close'].pct_change()

# Plot the daily return on average
plt.figure(figsize=(12, 6))
sns.lineplot(x=selected_company_df.index, y=selected_company_df['Daily_Return'])
plt.title('Daily Return of {} Stock on Average'.format(company_name))
plt.xlabel('Date')
plt.ylabel('Daily Return')
plt.show()

In [None]:
# Calculate daily returns
selected_company_df['Daily_Return'] = selected_company_df['Close'].pct_change()

# Plot the daily return on average
plt.figure(figsize=(12, 6))
sns.lineplot(x=selected_company_df.index, y=selected_company_df['Daily_Return'])
plt.title('Daily Return of {} Stock on Average'.format(company_name))
plt.xlabel('Date')
plt.ylabel('Daily Return')
plt.show()

In [None]:
# Plot the closing price
plt.figure(figsize=(12, 6))
sns.lineplot(x=selected_company_df.index, y=selected_company_df['Close'])
plt.title('Closing Price of {} from {} to {}'.format(company_name, start.strftime("%Y-%m-%d"), end.strftime("%Y-%m-%d")))
plt.xlabel('Date')
plt.ylabel('Closing Price')
plt.show()

In [None]:
plt.figure(figsize=(12, 6))
sns.lineplot(x=selected_company_df.index, y=selected_company_df['Volume'])
plt.title(f'Total Volume of Stock Trade for {company_name}')
plt.xlabel('Date')
plt.ylabel('Volume')
plt.show()


In [None]:
ma_day = [10, 20, 50]

for ma in ma_day:
    for company in company_list:
        column_name = f"MA for {ma} days"
        company[column_name] = company['Adj Close'].rolling(ma).mean()


fig, axes = plt.subplots(nrows=5, ncols=2)
fig.set_figheight(20)
fig.set_figwidth(30)

AAPL[['Adj Close', 'MA for 10 days', 'MA for 20 days', 'MA for 50 days']].plot(ax=axes[0,0])
axes[0,0].set_title('APPLE')

GOOG[['Adj Close', 'MA for 10 days', 'MA for 20 days', 'MA for 50 days']].plot(ax=axes[0,1])
axes[0,1].set_title('GOOGLE')

MSFT[['Adj Close', 'MA for 10 days', 'MA for 20 days', 'MA for 50 days']].plot(ax=axes[1,0])
axes[1,0].set_title('MICROSOFT')

AMZN[['Adj Close', 'MA for 10 days', 'MA for 20 days', 'MA for 50 days']].plot(ax=axes[1,1])
axes[1,1].set_title('AMAZON')

NVDA[['Adj Close', 'MA for 10 days', 'MA for 20 days', 'MA for 50 days']].plot(ax=axes[2,0])
axes[2,0].set_title('NVIDIA')

ADBE[['Adj Close', 'MA for 10 days', 'MA for 20 days', 'MA for 50 days']].plot(ax=axes[2,1])
axes[2,1].set_title('ADOBE')

CRM[['Adj Close', 'MA for 10 days', 'MA for 20 days', 'MA for 50 days']].plot(ax=axes[3,0])
axes[3,0].set_title('SALESFORCE')

ACN[['Adj Close', 'MA for 10 days', 'MA for 20 days', 'MA for 50 days']].plot(ax=axes[3,1])
axes[3,1].set_title('ACCENTURE')

CSCO[['Adj Close', 'MA for 10 days', 'MA for 20 days', 'MA for 50 days']].plot(ax=axes[4,0])
axes[4,0].set_title('CISCO')

MA[['Adj Close', 'MA for 10 days', 'MA for 20 days', 'MA for 50 days']].plot(ax=axes[4,1])
axes[4,1].set_title('MASTERCARD')

fig.tight_layout()

In [None]:
# Calculate daily returns
selected_company_df['Daily_Return'] = selected_company_df['Close'].pct_change()

# Plot the daily return on average
plt.figure(figsize=(12, 6))
sns.lineplot(x=selected_company_df.index, y=selected_company_df['Daily_Return'])
plt.title(f'Total Volume of Stock Trade for {company_name}')

plt.xlabel('Date')
plt.ylabel('Daily Return')
plt.show()

In [None]:
# Plot histogram for average daily return
plt.figure(figsize=(10, 6))
sns.histplot(selected_company_df['Adj Close'].pct_change().dropna(), bins=30, kde=True)
plt.title(f'Total Volume of Stock Trade for {company_name}')
plt.xlabel('Average Daily Return')
plt.ylabel('Frequency')
plt.show()


# Predictaion

In [None]:
# Get the stock quote
df = selected_company_df
# Show teh data
df

In [None]:
plt.figure(figsize=(16,6))
plt.title('Close Price History')
plt.plot(df['Close'])
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price USD ($)', fontsize=18)
plt.show()

In [None]:
# Create a new dataframe with only the 'Close column
data = df.filter(['Close'])
# Convert the dataframe to a numpy array
dataset = data.values
# Get the number of rows to train the model on
training_data_len = int(np.ceil( len(dataset) * .95 ))

training_data_len

In [None]:
# Scale the data
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(dataset)

scaled_data

In [None]:
# Create the training data set
# Create the scaled training data set
train_data = scaled_data[0:int(training_data_len), :]
# Split the data into x_train and y_train data sets
x_train = []
y_train = []

for i in range(60, len(train_data)):
    x_train.append(train_data[i-60:i, 0])
    y_train.append(train_data[i, 0])
    if i<= 61:
        print(x_train)
        print(y_train)
        print()

# Convert the x_train and y_train to numpy arrays
x_train, y_train = np.array(x_train), np.array(y_train)

# Reshape the data
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
# x_train.shape

In [None]:
from keras.models import Sequential
from keras.layers import Dense, LSTM

# Build the LSTM model
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape= (x_train.shape[1], 1)))
model.add(LSTM(64, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(x_train, y_train, batch_size=1, epochs=1)

In [None]:
# Create the testing data set
# Create a new array containing scaled values from index 1543 to 2002
test_data = scaled_data[training_data_len - 60: , :]
# Create the data sets x_test and y_test
x_test = []
y_test = dataset[training_data_len:, :]
for i in range(60, len(test_data)):
    x_test.append(test_data[i-60:i, 0])

# Convert the data to a numpy array
x_test = np.array(x_test)

# Reshape the data
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1 ))

# Get the models predicted price values
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)

# Get the root mean squared error (RMSE)
rmse = np.sqrt(np.mean(((predictions - y_test) ** 2)))
rmse

In [None]:
# Plot the data
train = data[:training_data_len]
valid = data[training_data_len:]
valid['Predictions'] = predictions
# Visualize the data
plt.figure(figsize=(16,6))
plt.title('Model')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price USD ($)', fontsize=18)
plt.plot(train['Close'])
plt.plot(valid[['Close', 'Predictions']])
plt.legend(['Train', 'Val', 'Predictions'], loc='lower right')
plt.show()

In [None]:
# Show the valid and predicted prices
valid