In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.model_selection import train_test_split

In [None]:
!pip install kaggle



In [None]:
# setting up the kaggle credentials &
# configuring the path of Kaggle.json file
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d jacksoncrow/stock-market-dataset

Dataset URL: https://www.kaggle.com/datasets/jacksoncrow/stock-market-dataset
License(s): CC0-1.0
Downloading stock-market-dataset.zip to /content
 98% 513M/522M [00:05<00:00, 81.0MB/s]
100% 522M/522M [00:05<00:00, 92.2MB/s]


In [None]:
# now that we've got the zip file , extract the dataset out of it
from zipfile import ZipFile
dataset_zip = '/content/stock-market-dataset.zip'

with ZipFile('/content/stock-market-dataset.zip') as zip:
  zip.extractall();
  print("All the files are extracted !")

All the files are extracted !


In [None]:
!ls
# run this command to check all the directories present in the uploads

etfs  kaggle.json  sample_data	stock-market-dataset.zip  stocks  symbols_valid_meta.csv


In [None]:
# making a pandas dataframe of the stocks data
stock_dataset = pd.read_csv('/content/symbols_valid_meta.csv')

In [None]:
stock_dataset.shape

(8049, 12)

In [None]:
stock_dataset.head()

Unnamed: 0,Nasdaq Traded,Symbol,Security Name,Listing Exchange,Market Category,ETF,Round Lot Size,Test Issue,Financial Status,CQS Symbol,NASDAQ Symbol,NextShares
0,Y,A,"Agilent Technologies, Inc. Common Stock",N,,N,100.0,N,,A,A,N
1,Y,AA,Alcoa Corporation Common Stock,N,,N,100.0,N,,AA,AA,N
2,Y,AAAU,Perth Mint Physical Gold ETF,P,,Y,100.0,N,,AAAU,AAAU,N
3,Y,AACG,ATA Creativity Global - American Depositary Sh...,Q,G,N,100.0,N,N,,AACG,N
4,Y,AADR,AdvisorShares Dorsey Wright ADR ETF,P,,Y,100.0,N,,AADR,AADR,N


In [None]:
stock_dataset.tail()

Unnamed: 0,Nasdaq Traded,Symbol,Security Name,Listing Exchange,Market Category,ETF,Round Lot Size,Test Issue,Financial Status,CQS Symbol,NASDAQ Symbol,NextShares
8044,Y,ZUO,"Zuora, Inc. Class A Common Stock",N,,N,100.0,N,,ZUO,ZUO,N
8045,Y,ZVO,Zovio Inc. - Common Stock,Q,Q,N,100.0,N,N,,ZVO,N
8046,Y,ZYME,Zymeworks Inc. Common Shares,N,,N,100.0,N,,ZYME,ZYME,N
8047,Y,ZYNE,"Zynerba Pharmaceuticals, Inc. - Common Stock",Q,G,N,100.0,N,N,,ZYNE,N
8048,Y,ZYXI,"Zynex, Inc. - Common Stock",Q,S,N,100.0,N,N,,ZYXI,N


In [None]:
# checking for any null values in the dataset
stock_dataset.isnull().sum()

Nasdaq Traded          0
Symbol                 0
Security Name          0
Listing Exchange       0
Market Category        0
ETF                    0
Round Lot Size         0
Test Issue             0
Financial Status    4666
CQS Symbol          3383
NASDAQ Symbol          0
NextShares             0
dtype: int64

In [None]:
stock_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8049 entries, 0 to 8048
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Nasdaq Traded     8049 non-null   object 
 1   Symbol            8049 non-null   object 
 2   Security Name     8049 non-null   object 
 3   Listing Exchange  8049 non-null   object 
 4   Market Category   8049 non-null   object 
 5   ETF               8049 non-null   object 
 6   Round Lot Size    8049 non-null   float64
 7   Test Issue        8049 non-null   object 
 8   Financial Status  3383 non-null   object 
 9   CQS Symbol        4666 non-null   object 
 10  NASDAQ Symbol     8049 non-null   object 
 11  NextShares        8049 non-null   object 
dtypes: float64(1), object(11)
memory usage: 754.7+ KB


In [None]:
stock_dataset['Nasdaq Traded'].value_counts()

Nasdaq Traded
Y    8049
Name: count, dtype: int64

In [None]:
stock_dataset['Symbol'].value_counts()

Symbol
A       1
OUSM    1
OXLC    1
OXFD    1
OXBR    1
       ..
FLTB    1
FLT     1
FLSW    1
FLSP    1
ZYXI    1
Name: count, Length: 8049, dtype: int64

In [None]:
stock_dataset['Security Name'].value_counts()

Security Name
Urstadt Biddle Properties Inc. Common Stock     2
iShares Edge MSCI Intl Size Factor ETF          2
MFS Municipal Income Trust Common Stock         2
iShares Morningstar Mid-Cap ETF                 2
Agilent Technologies, Inc. Common Stock         1
                                               ..
FleetCor Technologies, Inc. Common Stock        1
Franklin FTSE Switzerland ETF                   1
Franklin Liberty Systematic Style Premia ETF    1
Franklin FTSE Saudi Arabia ETF                  1
Zynex, Inc. - Common Stock                      1
Name: count, Length: 8045, dtype: int64

In [None]:
stock_dataset['Listing Exchange'].value_counts()

Listing Exchange
Q    3383
N    2520
P    1542
Z     351
A     253
Name: count, dtype: int64

In [None]:
stock_dataset['Market Category'].value_counts()

Market Category
     4666
Q    1531
S     952
G     900
Name: count, dtype: int64

In [None]:
stock_dataset['ETF'].value_counts()

ETF
N    5884
Y    2165
Name: count, dtype: int64

In [None]:
stock_dataset['Round Lot Size'].value_counts()

Round Lot Size
100.0    8044
10.0        4
1.0         1
Name: count, dtype: int64

In [None]:
stock_dataset['Test Issue'].value_counts()

Test Issue
N    8049
Name: count, dtype: int64

In [None]:
stock_dataset['Financial Status'].value_counts()

Financial Status
N    3180
D     191
E       8
H       4
Name: count, dtype: int64

In [None]:
stock_dataset['CQS Symbol'].value_counts()

CQS Symbol
A       1
PBDM    1
PAGP    1
PAG     1
PACK    1
       ..
FNGS    1
FNGO    1
FNGD    1
FNF     1
ZYME    1
Name: count, Length: 4666, dtype: int64

In [None]:
stock_dataset['NASDAQ Symbol'].value_counts()

NASDAQ Symbol
A       1
OUSM    1
OXLC    1
OXFD    1
OXBR    1
       ..
FLTB    1
FLT     1
FLSW    1
FLSP    1
ZYXI    1
Name: count, Length: 8049, dtype: int64

In [None]:
stock_dataset['NextShares'].value_counts()

NextShares
N    8046
Y       3
Name: count, dtype: int64

In [None]:
# pre processing the dataset //
stock_dataset.drop([''])

KeyError: "[''] not found in axis"