# NASDAQ 1962-2024 - Stock Price Analysis

Import necessary libraries:

In [1]:
# Import the necessary libraries
# Importing libraries for data manipulation
import pandas as pd

import numpy as np

# Importing libraries for data visualization
import matplotlib.pylab as plt

# Importing an advanced library for data visualization
import seaborn as sns

# Importing library for date manipulation
from datetime import datetime

# To calculate the MSE or RMSE
from sklearn.metrics import mean_squared_error

# Importing acf and pacf functions
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# Importing models from statsmodels library
from statsmodels.tsa.ar_model import AutoReg

from statsmodels.tsa.arima.model import ARIMA

import ipywidgets as widgets
from ipywidgets import interact
# To ignore the warnings
import warnings
warnings.filterwarnings('ignore')

## Basic Data Analysis and Cleanup

In [49]:
# import data
data = pd.read_csv("C:\\Users\\spase\\Desktop\\Projects\\Data_NASDAQ\\NASDAQ1962-2024.csv")
df = data.copy() # make copy to not change data

# print first five rows
df.head()

Unnamed: 0,Date,Ticker,Exchange,Open,High,Low,Close,Adj Close,Volume
0,2008-01-29,AACG,NASDAQ,9.5,9.99,8.57,8.75,0.702589,1489000.0
1,2008-01-30,AACG,NASDAQ,8.75,9.15,8.3,8.5,0.682515,219000.0
2,2008-01-31,AACG,NASDAQ,8.49,10.3,8.49,9.55,0.766826,182300.0
3,2008-02-01,AACG,NASDAQ,9.93,9.94,9.5,9.51,0.763614,28200.0
4,2008-02-04,AACG,NASDAQ,9.5,9.71,9.5,9.5,0.762811,8300.0


In [50]:
# data shape
df.shape

(11470963, 9)

In [51]:
# data info
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11470963 entries, 0 to 11470962
Data columns (total 9 columns):
 #   Column     Dtype  
---  ------     -----  
 0   Date       object 
 1   Ticker     object 
 2   Exchange   object 
 3   Open       float64
 4   High       float64
 5   Low        float64
 6   Close      float64
 7   Adj Close  float64
 8   Volume     float64
dtypes: float64(6), object(3)
memory usage: 787.6+ MB


Date, Ticker and Exchange are objects, rest are floats.

In [55]:
# Convert Date to datetime data type
df.Date = pd.to_datetime(df.Date,infer_datetime_format=True)

In [56]:
# number of unique values in each column
df.nunique()

Date           15705
Ticker          3370
Exchange           1
Open          458220
High          484375
Low           473964
Close         464241
Adj Close    3362894
Volume        638125
dtype: int64

We can see that we have 3370 different stocks.

In [58]:
# number of null values
df.isnull().sum()

Date           0
Ticker       470
Exchange       0
Open         130
High         130
Low          130
Close        130
Adj Close    130
Volume       130
dtype: int64

We have 470 rows with no Ticker label (unknown stock) and 130 rows with no stock prices.

In [59]:
# define array of stocks that have missing data.
stocks_with_missing_data = df.loc[pd.isnull(df['Low'])].iloc[:,1].unique()

In [60]:
print(stocks_with_missing_data)

['AMNB' 'CRVO' 'ELVN' 'ESGL' 'GAQ' 'JTAI' 'LDWY' 'LUXH' 'NUKK' 'RCAT'
 'RENB' 'SHPW' 'TLSI' 'TYGO' 'VSTE']


We can see that we have 15 stocks that have some missing values.

In [61]:
# find where the missing ticker values are
df.loc[pd.isnull(df['Ticker'])]

Unnamed: 0,Date,Ticker,Exchange,Open,High,Low,Close,Adj Close,Volume
6898133,2022-07-12,,NASDAQ,7.610,13.390,7.510,12.050,12.050,1486800.0
6898134,2022-07-13,,NASDAQ,9.770,12.000,8.800,8.800,8.800,264400.0
6898135,2022-07-14,,NASDAQ,8.010,8.260,6.321,7.350,7.350,134100.0
6898136,2022-07-15,,NASDAQ,7.640,7.820,6.800,6.800,6.800,47000.0
6898137,2022-07-18,,NASDAQ,6.450,8.490,6.300,6.700,6.700,69400.0
...,...,...,...,...,...,...,...,...,...
6898598,2024-05-16,,NASDAQ,0.590,0.600,0.570,0.590,0.590,134700.0
6898599,2024-05-17,,NASDAQ,0.619,0.663,0.590,0.610,0.610,532000.0
6898600,2024-05-20,,NASDAQ,0.592,0.609,0.575,0.595,0.595,110000.0
6898601,2024-05-21,,NASDAQ,0.621,0.650,0.581,0.584,0.584,335900.0


The values start from 12 July 2022, perhaps the previous and later values can make it clear which stock this is supposed to be.

In [62]:
# find the stock name before the missing values
df.iloc[6898132]

Date         2024-05-22 00:00:00
Ticker                      MYSZ
Exchange                  NASDAQ
Open                        4.53
High                         4.6
Low                          4.1
Close                       4.25
Adj Close                   4.25
Volume                   37800.0
Name: 6898132, dtype: object

In [63]:
# find the stock name before the missing values
df.iloc[6898603]

Date         2017-10-20 00:00:00
Ticker                      NAAS
Exchange                  NASDAQ
Open                       80.25
High                   89.300003
Low                         80.0
Close                  83.050003
Adj Close              83.050003
Volume                 2308940.0
Name: 6898603, dtype: object

We can see on either side we have two different stocks so we can't be sure which stock this is supposed to be. For this project, we will simply drop this unknown stock and drop any stock that has missing values as well.

In [64]:
# drop all null values
df = df.dropna()

In [67]:
df.isnull().sum()

Date         0
Ticker       0
Exchange     0
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [72]:
# drop rows with the stocks that have incomplete data
mask = df['Ticker'].isin(stocks_with_missing_data) # boolean mask
df = df[~mask]

In [73]:
# check that we have no null values and that we have removed the incomplete stocks
df.isnull().sum()

Date         0
Ticker       0
Exchange     0
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [74]:
df.nunique()

Date           15705
Ticker          3355
Exchange           1
Open          457829
High          483869
Low           473409
Close         463953
Adj Close    3357173
Volume        637908
dtype: int64

We now have 3355 stocks as expected and now null values.

In [75]:
# Collect names of the stocks in an array

stocks = df.iloc[:,1].unique()

## Exploratory Data Analysis and Visualization