In [42]:
#Import Libraries
import os
import pandas as pd
import yfinance as yf
from datetime import datetime
import plotly.express as px
import kaleido
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import plotly.graph_objects as go
import numpy as np

In [43]:
#Get the current working directory
codes_dir = os.getcwd()
print(codes_dir)

#Get the parent directory
parent_dir = os.path.dirname(codes_dir)
print(parent_dir)

#Get the assets directory
assets_dir = os.path.join(parent_dir, 'Assets')
print(assets_dir)

#Get the data directory
data_dir = os.path.join(parent_dir, 'Data Set')
print(data_dir)

#Get the Stocks directory
stocks_dir = os.path.join(data_dir, 'Stocks')
print(stocks_dir)

E:\Git\Data_Science_Portfolio\Stock Market Performance Analysis using Python\Codes
E:\Git\Data_Science_Portfolio\Stock Market Performance Analysis using Python
E:\Git\Data_Science_Portfolio\Stock Market Performance Analysis using Python\Assets
E:\Git\Data_Science_Portfolio\Stock Market Performance Analysis using Python\Data Set
E:\Git\Data_Science_Portfolio\Stock Market Performance Analysis using Python\Data Set\Stocks


In [44]:
#Set a Date Range for 6 months before
start_date = datetime.now() - pd.DateOffset(months=6)
end_date = datetime.now()

#List of Tickers for Apple, Microsoft, Netflix, and Google
tickers = ['AAPL', 'MSFT', 'NFLX', 'GOOG']

#Define an empty list 
ds_list = []

#Download the data from Yahoo Finance
for ticker in tickers:
    data_set = yf.download(ticker, start=start_date, end=end_date)
    ds_list.append(data_set)

ds = pd.concat(ds_list, keys=tickers, names=['Ticker', 'Date'])
print(ds.head())

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

                         Open        High         Low       Close   Adj Close  \
Ticker Date                                                                     
AAPL   2023-12-28  194.139999  194.660004  193.169998  193.580002  193.071426   
       2023-12-29  193.899994  194.399994  191.729996  192.529999  192.024185   
       2024-01-02  187.149994  188.440002  183.889999  185.639999  185.152283   
       2024-01-03  184.220001  185.880005  183.429993  184.250000  183.765930   
       2024-01-04  182.149994  183.089996  180.880005  181.910004  181.432098   

                     Volume  
Ticker Date                  
AAPL   2023-12-28  34049900  
       2023-12-29  42628800  
       2024-01-02  82488700  
       2024-01-03  58414500  
       2024-01-04  71983600  





In [45]:
#reset the index in the dataframe
ds.reset_index(inplace=True)
print(ds.head())

  Ticker       Date        Open        High         Low       Close  \
0   AAPL 2023-12-28  194.139999  194.660004  193.169998  193.580002   
1   AAPL 2023-12-29  193.899994  194.399994  191.729996  192.529999   
2   AAPL 2024-01-02  187.149994  188.440002  183.889999  185.639999   
3   AAPL 2024-01-03  184.220001  185.880005  183.429993  184.250000   
4   AAPL 2024-01-04  182.149994  183.089996  180.880005  181.910004   

    Adj Close    Volume  
0  193.071426  34049900  
1  192.024185  42628800  
2  185.152283  82488700  
3  183.765930  58414500  
4  181.432098  71983600  


In [46]:
#Save the data as csv file
ds.to_csv(os.path.join(stocks_dir, 'Stock Market Data.csv'), index=False)

In [47]:
#Performance analysis of in the stock market for past 6 months
px.defaults.template = "plotly_dark"

#Create a line chart
figStckMktAn = px.line(ds, x='Date', 
               y='Close', 
              color='Ticker', 
              title="Stock Market Performance for the Last 6 Months")
figStckMktAn.show()

#Save the chart as png file using keleido in assets directory
figStckMktAn.write_image(os.path.join(assets_dir, "Stock Market Performance.png"))

In [48]:
#Faceted area chart for six months
figFctArCht = px.area(ds, x='Date', y='Close', color='Ticker',
              facet_col='Ticker',
              labels={'Date':'Date', 'Close':'Closing Price', 'Ticker':'Company'},
              title='Stock Prices for Apple, Microsoft, Netflix, and Google')
figFctArCht.show()

#Save the chart as png file using keleido in assets directory
figFctArCht.write_image(os.path.join(assets_dir, "Faceted Area Chart.png"))

In [49]:
#Analyze moving averages
ds['MA10'] = ds.groupby('Ticker')['Close'].rolling(window=10).mean().reset_index(0, drop=True)
ds['MA20'] = ds.groupby('Ticker')['Close'].rolling(window=20).mean().reset_index(0, drop=True)

for ticker, group in ds.groupby('Ticker'):
    print(f'Moving Averages for {ticker}')
    print(group[['MA10', 'MA20']])

Moving Averages for AAPL
           MA10        MA20
0           NaN         NaN
1           NaN         NaN
2           NaN         NaN
3           NaN         NaN
4           NaN         NaN
..          ...         ...
120  208.509000  200.224999
121  209.634000  201.287999
122  211.229001  202.242500
123  211.839001  203.405499
124  211.942001  204.596000

[125 rows x 2 columns]
Moving Averages for GOOG
           MA10        MA20
375         NaN         NaN
376         NaN         NaN
377         NaN         NaN
378         NaN         NaN
379         NaN         NaN
..          ...         ...
495  177.864000  176.897001
496  178.348000  177.183501
497  179.242999  177.646001
498  179.960999  178.013500
499  180.690999  178.486501

[125 rows x 2 columns]
Moving Averages for MSFT
           MA10        MA20
125         NaN         NaN
126         NaN         NaN
127         NaN         NaN
128         NaN         NaN
129         NaN         NaN
..          ...         ...
245  439.

In [50]:
#Visualize moving averages
for ticker, group in ds.groupby('Ticker'):
    figMovAvg = px.line(group, x='Date', y=['Close', 'MA10', 'MA20'], 
                  color_discrete_sequence=['red', 'blue', 'green'],
                  title=f"{ticker} Moving Averages")
    figMovAvg.show()

#Save multiple charts as png files using keleido
    figMovAvg.write_image(f"{ticker} Moving Averages.png")


In [51]:
#Analyze volatility of the companies
ds['Volatility'] = ds.groupby('Ticker')['Close'].pct_change().rolling(window=10).std().reset_index(0, drop=True)
figVol = px.line(ds, x='Date', y='Volatility', 
              color='Ticker', 
              title='Volatility of All Companies')
figVol.show()

#Save the chart as png file in assets directory using keleido
figVol.write_image(os.path.join(assets_dir, "Volatility of All Companies.png"))


In [52]:
#Analyze the correlation between the companies (Netflix and Microsoft)

# create a DataFrame with the stock prices of Netflix and Microsoft
netflix = ds.loc[ds['Ticker'] == 'NFLX', ['Date', 'Close']].rename(columns={'Close': 'NFLX'})
microsft = ds.loc[ds['Ticker'] == 'MSFT', ['Date', 'Close']].rename(columns={'Close': 'MSFT'})
ds_corr = pd.merge(netflix, microsft, on='Date')

# create a scatter plot to visualize the correlation
figCorNFLXnMSFT = px.scatter(ds_corr, x='NFLX', y='MSFT', 
                 trendline='ols', 
                 title='Correlation between Netflix and Microsoft')
figCorNFLXnMSFT.show()

#Save the chart as png file in assets directory using keleido
figCorNFLXnMSFT.write_image(os.path.join(assets_dir, "Correlation between Netflix and Microsoft.png"))

In [53]:
# Define the target column you want to predict
target_column = 'Close'

# Define the feature columns you want to use for prediction
feature_columns = ['Open', 'High', 'Low', 'Adj Close', 'Volume']

In [54]:
# Define the number of future days to predict
num_future_days = 5

In [55]:
# List to store the future predictions for all tickers
all_ticker_predictions = []

In [56]:
# Iterate over the unique tickers in the dataset
unique_tickers = ds['Ticker'].unique()

In [57]:
for ticker in unique_tickers:
    # Filter the data for the current ticker
    ticker_data = ds[ds['Ticker'] == ticker]

    # Split the data into training and testing sets
    X = ticker_data[feature_columns].values
    y = ticker_data[target_column].values

    # Initialize and train a Linear Regression model
    model = LinearRegression()
    model.fit(X, y)

    # Predict future stock prices for the next 'num_future_days' days
    last_data_point = X[-1].reshape(1, -1)  # Reshape for prediction
    future_predictions = []

    for _ in range(num_future_days):
        future_prediction = model.predict(last_data_point)
        future_predictions.append(future_prediction)
        # Update the last_data_point with the predicted value for the next iteration
        last_data_point[0][-1] = future_prediction

    # Create a DataFrame for future predictions
    future_dates = pd.date_range(start=ticker_data['Date'].iloc[-1], periods=num_future_days, freq='D')
    future_results_df = pd.DataFrame({'Date': future_dates, 'Predicted': np.array(future_predictions).flatten()})
    future_results_df['Ticker'] = ticker  # Add the ticker column

    # Append the future predictions for the current ticker to the list
    all_ticker_predictions.append(future_results_df)


Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)


Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)


Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)


Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)


Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated

In [58]:
# Concatenate the future predictions for all tickers into a single DataFrame
all_predictions_ds = pd.concat(all_ticker_predictions)

In [59]:
# Concatenate the future predictions for all tickers into a single DataFrame
all_predictions_ds = pd.concat(all_ticker_predictions)

# Create time series graphs for future predictions for all tickers
figs = []

In [60]:
for ticker in unique_tickers:
    ticker_predictions = all_predictions_ds[all_predictions_ds['Ticker'] == ticker]
    
    # Create a time series graph for future predictions
    fig = go.Figure()

    # Add future predicted values as a line
    fig.add_trace(go.Scatter(x=ticker_predictions['Date'], y=ticker_predictions['Predicted'], mode='lines', name='Predicted'))

    # Customize the layout for future predictions
    fig.update_layout(
        title=f'Predicted Stock Prices for {ticker} (Next {num_future_days} Days)',
        xaxis_title='Date',
        yaxis_title='Price',
    )

    figs.append(fig)

In [61]:
# Display the individual time series graphs for future predictions
for fig in figs:
    fig.show()