
<h2>This program webscrapes benzinga for stocks with specified conditions and uses the tickers to webscrape yahoo finance</h2>


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
import bs4 as bs
import datetime as dt
import webbrowser
import time

In [None]:
#premarket movers on benzinga

def get_million(min_price=4, min_rank=10, min_volume=1):
  url = 'https://www.benzinga.com/premarket/'
  resp = requests.get(url)
  soup = bs.BeautifulSoup(resp.text)
  table = soup.find('table', {'premarket-stock-table premarket-stock-table--scrollable'})

  tickers = []
  names = []
  prices = []
  changes = []
  volumes = []


  for row in table.findAll('tr')[1:]:
          ticker = row.findAll('td')[0].text
          name = row.findAll('td')[1].text
          price = row.findAll('td')[2].text
          change = row.findAll('td')[3].text
          volume = row.findAll('td')[4].text

          tickers.append(ticker)
          names.append(name)
          prices.append(price)
          changes.append(change)
          volumes.append(volume)
          
  #Remove whitespace and \n
  columns = [tickers, names, prices, changes, volumes]

  for column in columns:
    for i in range(len(column)):
      column[i] = column[i].strip()

  #Remove $ from Price and change dtype to float
  for i in range(len(prices)):
    prices[i] = float(prices[i][prices[i].index('$')+1:])

  #Remove % from Change % and change dtype to float
  for i in range(len(changes)):
    changes[i] = float(changes[i][:changes[i].index('%')])
    
  #Create dataframe
  benzinga = pd.DataFrame({
      'Rank' : np.arange(1,len(tickers)+1),
      'Ticker':tickers,
      'Name':names,
      'Price$':prices,
      'Change%':changes,
      'Volume':volumes
  })

  #Set rank as index
  #benzinga.set_index('Rank', inplace=True)

  #Create dataframe of Stocks with volume greater than million
  million = benzinga[benzinga['Volume'].str.contains('M')].copy()

  #Remove the M
  million['Volume'] = million['Volume'].str[:-1]

  #Change dtype to float
  million['Volume'] = million['Volume'].astype('float')

  #Rename volume column to include M
  million.rename(columns={'Volume':'Volume(M)'}, inplace=True)

  #Set a minimum price, default is 4 or more
  million = million[million['Price$']>=min_price]

  #Set a minimum rank, default is 10 or less
  million = million[million['Rank']<=min_rank]

   #Set a minimum volume, default is 1 or more
  million = million[million['Volume(M)']>=min_volume]
    
  return million

  #Order by volume, then price
  #million.sort_values(by=['Volume','Price'], ascending=False)

In [None]:
display(get_million())

Unnamed: 0,Rank,Ticker,Name,Price$,Change%,Volume(M)
3,4,CLVS,Clovis Oncology,6.44,21.05,8.07
4,5,ZKIN,ZK International Gr Co,10.5,19.72,5.87


In [None]:
#Return tickers with volume greater than or equal to 1 million

def million_to_list():
    return get_million()['Ticker'].tolist()

million_to_list()

['CLVS', 'ZKIN']

In [None]:
#Open million_tickers in finance
yahoo = 'https://finance.yahoo.com/chart/'
links = [yahoo+ticker for ticker in get_million()['Ticker']]

def open_finance():
    for link in links:
        webbrowser.open_new_tab(link)
    #print(link)

open_finance()

In [None]:
#Create CSV using Time and Stock Prices, Stock Tickers as column names


#Create the dictionary
stocks = {'Time':[]}

#Fill dictionary with stock names as keys
for stock in million_to_list():
    stocks.update({stock:[]})

current_time = dt.datetime.now()
year = int(current_time.strftime('%Y'))
month = int(current_time.strftime('%m'))
day = int(current_time.strftime('%d'))

#End time 7:30AM of current day
end = dt.datetime(year,month,day,hour=7, minute=30).strftime('%H:%M:%S')

#Add the values
#Everything between while and break can be a function
while True:
    now = dt.datetime.now().strftime('%H:%M:%S')
    stocks['Time'].append(now)
    for stock in million_to_list():
        link = yahoo+stock
        resp = requests.get(link)
        soup = bs.BeautifulSoup(resp.text)
        table = soup.find_all('span',class_='Trsdu(0.3s) Fw(b) Fz(14px) Mb(-4px) D(ib)')

        price = float(table[0].contents[0])

        stocks[stock].append(price)

    if now >= end:
        break
        
      
    #time.sleep(1) #Get data every second
    
#SUCCESS

In [None]:
stocks

{'CLVS': [7.68], 'Time': ['19:49:06'], 'ZKIN': [11.11]}

In [None]:
#Create the dataframe
millions_dataframe = pd.DataFrame(stocks)

millions_dataframe

Unnamed: 0,Time,CLVS,ZKIN
0,18:50:20,7.88,11.32


In [None]:
#Save to CSV
date = dt.datetime.now().strftime('_%d_%m_%y')
name = 'stocks'+date+'.csv'

millions_dataframe.to_csv(name)