# 1. import all modules and define scope

### Scope: observe Nasdaq behavior on "the last month" to gain market insights.

In [1]:
import os
import re
import csv
import time
import urllib
import numpy as np
import pandas as pd
import requests as req
import multiprocessing as mp
from bs4 import BeautifulSoup as bs
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import bar_chart_race as bcr
import ffmpeg

#custom modules
from nn_downloads import nasdaq_downloads

# 2. begin with BeautifulSoup and web scraping
♠ normal web-scrap for the table in wikipedia: https://en.wikipedia.org/wiki/NASDAQ-100

♠ save ticker names as list

♠ save table as dataframe

                                                    Set up

In [2]:
wiki_url = req.get('https://en.wikipedia.org/wiki/NASDAQ-100').content

In [3]:
wiki_sopa = bs(wiki_url, 'html.parser')

In [4]:
# get the full table 
wiki_tabla = wiki_sopa.find('table',{'id':'constituents'})

                                                    Get Links

In [5]:
# get all html " a " tags, which define hyperlinks
proto_tickers = wiki_tabla.findAll('td')
proto_tickers_list = [proto_tickers[i] for i in range(len(proto_tickers)) if i%2 !=0]
proto_tickers_list[:2]

[<td>ATVI
 </td>,
 <td>ADBE
 </td>]

In [6]:
pre_tickers = re.findall('>.+', str(proto_tickers_list))
tickers = [q.split('>')[-1] for q in pre_tickers]
tickers.remove(']')
tickers[:4]

['ATVI', 'ADBE', 'AMD', 'ALXN']

In [7]:
# link creation, just adding the ticker to the end of the url https://www.nasdaq.com/market-activity/stocks/
semi_url = 'https://www.nasdaq.com/market-activity/stocks/'
full_url = [semi_url+tick for tick in tickers]
full_url[:3]

['https://www.nasdaq.com/market-activity/stocks/ATVI',
 'https://www.nasdaq.com/market-activity/stocks/ADBE',
 'https://www.nasdaq.com/market-activity/stocks/AMD']

                                                    Create Dataframe

In [8]:
# extract the table header so that the dataframe can have properly named columns
wiki_thead = wiki_tabla.findAll('th')
wiki_thead_txt = [i.text.replace('\n','') for i in wiki_thead]
wiki_thead_txt

['Company', 'Ticker']

In [9]:
# extract the table body so that the dataframe can have properly filled columns
wiki_tbody = wiki_tabla.findAll('td')
wiki_tbody_txt = [i.text.replace('\n','') for i in wiki_tbody]
wiki_tbody_txt[-4:]

['Xilinx, Inc.', 'XLNX', 'Zoom Video Communications', 'ZM']

In [10]:
# order the table body as a list of lists
# each element of the main list will have 2 string elements because len(wiki_thead_txt) = 2
j = 0
tbody_txt_as_lst = []
for i in range(0,len(wiki_tbody_txt),len(wiki_thead_txt)):
    tbody_txt_as_lst.append(wiki_tbody_txt[j:i])
    j = i
    
# remove first row of tbody_txt_as_lst ONLY IF first element is empty
try:
    if tbody_txt_as_lst[0]==[]:
            tbody_txt_as_lst.pop(0)
except:
    pass

# this is to create the dataframe containing all the companies listed in the NASDAQ 
nasdaq_df = pd.DataFrame(tbody_txt_as_lst, columns = wiki_thead_txt)
nasdaq_df

Unnamed: 0,Company,Ticker
0,Activision Blizzard,ATVI
1,Adobe Inc.,ADBE
2,Advanced Micro Devices,AMD
3,Alexion Pharmaceuticals,ALXN
4,Align Technology,ALGN
...,...,...
97,"Walgreen Boots Alliance, Inc.",WBA
98,"Workday, Inc.",WDAY
99,Western Digital,WDC
100,"Xcel Energy, Inc.",XEL


In [11]:
# the last company listed is missing, so we add it
df2 = pd.DataFrame([wiki_tbody_txt[-2:]], columns=['Company','Ticker'])
df2

Unnamed: 0,Company,Ticker
0,Zoom Video Communications,ZM


In [12]:
# no longer missing 
nasdaq_df = nasdaq_df.append(df2, ignore_index=True)
nasdaq_df

Unnamed: 0,Company,Ticker
0,Activision Blizzard,ATVI
1,Adobe Inc.,ADBE
2,Advanced Micro Devices,AMD
3,Alexion Pharmaceuticals,ALXN
4,Align Technology,ALGN
...,...,...
98,"Workday, Inc.",WDAY
99,Western Digital,WDC
100,"Xcel Energy, Inc.",XEL
101,"Xilinx, Inc.",XLNX


# 3. continue with selenium

                                                    Set up

In [13]:
# # open selenium
# navegador = webdriver.Edge()
# # define " wait " to be used later along with ExpectedConditions
# wait = WebDriverWait(navegador,15)

                                                    Iterate

In [14]:
# def get_all_info(navegador, links):
#     for i in links:
#         try:
#             nasdaq_downloads(navegador, i)
#         except:
#             print(f'There was an unexpected issue at\n{i}\nresuming endeavour')
#             pass

                                                    Download

In [15]:
#%%time
#get_all_info(navegador, full_url)
print("Wall time: 31min 54s")

Wall time: 31min 54s


# 4. Merge files and ready DataFrame to plot

                                                   Rename files

In [16]:
path = 'c:\\users\\dxoco\\desktop\\ironhack\\datamex_082020\\module-1\\web-project\\data'

files = []
# r=root, d=directories, f = files
for r, d, f in os.walk(path):
    for file in f:
        if '.csv' in file:
            files.append(os.path.join(r, file))
# source:
# https://mkyong.com/python/python-how-to-list-all-files-in-a-directory/

In [17]:
try:
    for i in range(len(f)):
        os.rename(fr'{path}\\{f[i]}',fr'{path}\\{tickers[i]}.csv')
except FileExistsError:
    print('already done!\nplease check variable "f":')
f[:3]
# source:
# https://datatofish.com/rename-file-python/

already done!
please check variable "f":


['AAPL.csv', 'ADBE.csv', 'ADI.csv']

                                                    Conglomo

In [18]:
# origin
dat = pd.read_csv(files[0])
df_origin = pd.DataFrame(dat, columns=dat.columns)
try:
    df_origin.drop(columns=[' Close/Last', ' Volume', ' Open', ' High', ' Low'],inplace=True)
except:
    print("that was done yesterday!")
df_merge_col = df_origin

#### last 30 days of stock price

In [19]:
%%time
# iterative
for i in range(len(f)):
    tick = f[i].split('.')[0]
    dat_iter = pd.read_csv(files[i])
    df_iter = pd.DataFrame(dat_iter, columns=list(dat.columns))
    df_iter.rename(columns={' Close/Last': tick},inplace=True)
    df_iter.drop(columns=[' Volume',' Open', ' High', ' Low'],inplace=True)
#     df_iter.rename(columns={' Close/Last': tick,' Volume':tick+' Volume'},inplace=True)
#     df_iter.drop(columns=[' Open', ' High', ' Low'],inplace=True)
    df_merge_col = pd.merge(df_merge_col, df_iter, on='Date')
# source to merge dataframes:
# https://www.datacamp.com/community/tutorials/joining-dataframes-pandas
df_merge_col.set_index('Date', drop=True, append=False, inplace=True, verify_integrity=False)

Wall time: 1.15 s


In [20]:
# source to apply a function to whole DataFrame:
# https://www.geeksforgeeks.org/python-pandas-dataframe-applymap/
df_merge_col = df_merge_col.applymap(lambda x: str(x).replace('$',''))
# if .applymap is not instantiated, then change is temporal
df_merge_col = df_merge_col.apply(lambda x: x.astype('float64'))
# and to apply a function column by column we use .apply

In [75]:
df_merge_col[:3]
# clean df with all columns as numeric value

Unnamed: 0_level_0,AAPL,ADBE,ADI,ADP,ADSK,ALGN,ALXN,AMAT,AMD,AMGN,...,ULTA,VRSK,VRSN,VRTX,WBA,WDAY,WDC,XEL,XLNX,ZM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
09/04/2020,120.96,491.94,117.44,138.53,233.88,309.0,112.97,60.96,82.01,248.4,...,240.51,182.77,203.54,266.5,36.89,215.8,38.16,70.02,101.64,317.2
09/03/2020,120.88,507.8,117.12,139.14,242.09,314.52,111.52,61.46,82.54,247.91,...,238.04,187.28,207.78,267.42,37.09,223.0,37.71,70.65,103.1,312.8
09/02/2020,131.4,533.8,121.83,142.48,261.35,324.43,113.82,65.08,90.22,258.12,...,237.66,192.79,219.7,281.89,37.17,233.48,38.86,71.55,108.86,310.2


#### Last 30 days of stock price times volume

In [72]:
%%time
df_totals = df_origin
for i in range(len(f)):
    tick = f[i].split('.')[0]
    dat_iter = pd.read_csv(files[i])
    df_iter = pd.DataFrame(dat_iter, columns=list(dat.columns))
    df_totals = pd.merge(df_totals, df_iter, on='Date')
    df_totals.set_index('Date', drop=True, append=False, inplace=True, verify_integrity=False)
    df_totals = df_totals.applymap(lambda x: str(x).replace('$',''))
    df_totals = df_totals.apply(lambda x: x.astype('float64'))
    df_totals[tick+'•Vol']=df_totals[' Close/Last']*df_totals[' Volume']
    df_totals.drop(columns=[' Close/Last',' Volume',' Open', ' High', ' Low'],inplace=True)


Wall time: 3.41 s


In [87]:
df_totals[:3]

Unnamed: 0_level_0,AAPL•Vol,ADBE•Vol,ADI•Vol,ADP•Vol,ADSK•Vol,ALGN•Vol,ALXN•Vol,AMAT•Vol,AMD•Vol,AMGN•Vol,...,ULTA•Vol,VRSK•Vol,VRSN•Vol,VRTX•Vol,WBA•Vol,WDAY•Vol,WDC•Vol,XEL•Vol,XLNX•Vol,ZM•Vol
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
09/04/2020,39978740000.0,1917835000.0,442304900.0,405085800.0,632561400.0,258773600.0,360083100.0,893618736.0,6742298000.0,722778900.0,...,274205700.0,127530900.0,199173700.0,403706200.0,257877800.0,557026800.0,286204400.0,176364000.0,259492400.0,17114526.0
09/03/2020,31138640000.0,2964353000.0,688695900.0,282933100.0,604678100.0,255066000.0,286900900.0,676857750.8,7219170000.0,777865000.0,...,231704800.0,168564400.0,249384400.0,433115300.0,307031200.0,716320600.0,277723500.0,200237600.0,257199800.0,13764764.0
09/02/2020,26295640000.0,1485799000.0,628408000.0,316304500.0,620459800.0,282531800.0,170184700.0,703025398.4,4544104000.0,741217100.0,...,214893800.0,173649000.0,171901200.0,381221000.0,285181800.0,800706100.0,288270800.0,208090800.0,258035500.0,13784977.8


# 5. Plot as bar chart race

                                                    Fanciness

In [85]:
bcr.bar_chart_race(df_totals,
                  sort='desc',
                   n_bars=9,
                   fixed_order=False,
                   fixed_max=True,
                   title='NASDAQ Daily Stocks Trade',
                   title_size='',
                   period_summary_func=lambda v, r: {'x': .99, 'y': .18,
                                                     's': f'Total Daily Value: {v.nlargest(103).sum():,.0f}',
                                                     'ha': 'right', 'size': 8, 'family': 'DejaVu Sans'},
                   period_length=2500,
                   steps_per_period=20)
# source
# https://medium.com/dunder-data/bar-chart-race-python-package-official-release-78a420e182a2

  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
 

