In [2]:
# Importing Libraries
import datetime,pytz
from deta import Deta
from bs4 import BeautifulSoup
from urllib.request import urlopen
from urllib.error import HTTPError
from urllib.error import URLError
from urllib.parse import urlencode

In [69]:
def crawler():
    """This function crawls nepstock website and returns some data.

    Returns:
        [type]: [description]
    """
    try:
        html = urlopen('http://www.nepalstock.com')
    except HTTPError as e:
        print(e)
    except URLError as e:
        print('The server could not be found!')
    else:
        print('Website Successfully Crawled.')
    bs = BeautifulSoup(html.read(), 'html.parser')
    
    index = bs.find('div', {'class':{'current-index'}}).text.split()[0]
    market_status = bs.find('div',{'id':'top-notice-bar'}).find('b').text.strip()
    market_data = {data.text.split('|')[0]:data.text.split('|')[1] for data in bs.find('div',{'id':'market_info'}).find_all('span')}

    tables = bs.find('div', {'id':{'nepse-stats'}}).find_all('table', {'class':{'table table-hover table-condensed'}})
    
    # get market summary
    market_summary={}
    market_summary["title"]=tables[0].find("thead").find('td').text
    market_summary["data"]={row.text.strip().split('\n')[0]:row.text.strip().split('\n')[1] for row in tables[0].find("tbody").findAll('tr')}
    # market_summary["data"]={}   #alternative method
    # for row in tables[0].find("tbody").findAll('tr'):
    #     k,v = row.text.strip().split('\n')
    #     market_summary["data"][k]=v

    market_information={}
    for table in tables[1:]:
        for row in table.find("thead").findAll('tr'):
            t_head=[data.text for data in row.find_all('td') if data.text !='']
          
        t_data=[]
        for row in table.find("tbody").findAll('tr'):
            if len(row.text.strip()) >0:
                t={k:v.text.strip() for k,v in zip(t_head,row.findAll('td'))}
                t_data.append(t)
        market_information[t_head[0]]=t_data 
    return {
        "NEPSE_INDEX":index,
        "market_status":market_status,
        "market_data":market_data,
        "market_summary":market_summary,
        "market_information":market_information
    }   

crawler()


Website Successfully Crawled.


{'NEPSE_INDEX': '2,633.65',
 'market_status': 'Market Close',
 'market_info': {'Share Volume ': ' 5,310,215', 'Turnover ': ' 2,742,085,704'},
 'market_summary': {'title': 'Market Summary',
  'data': {'Total Turnover Rs:': '2,742,085,704',
   'Total Traded Shares': '5,310,215',
   'Total Transactions': '37,585',
   'Total Scrips Traded': '223',
   'Total Market Capitalization Rs.': '3,702,494.52 Millions',
   'Floated Market Capitalization Rs.': '1,293,085.05 Millions'}},
 'market_information': {'Index': [{'Index': 'NEPSE',
    'Current': '2,633.65',
    'Points Change': '5.28',
    '%Change': '0.2'},
   {'Index': 'Sensitive',
    'Current': '496.74',
    'Points Change': '0.89',
    '%Change': '0.18'},
   {'Index': 'Float',
    'Current': '181.50',
    'Points Change': '0.19',
    '%Change': '0.1'},
   {'Index': 'Sen. Float',
    'Current': '161.40',
    'Points Change': '0.16',
    '%Change': '0.1'}],
  'Sub-Indices': [{'Sub-Indices': 'Banking',
    'Current': '1,763.02',
    'Points 

In [5]:
def share_price_crawler():
    """This function crawls nepstock website to get all the share prices.

    Returns:
        [type]: [description]
    """
    try:
        data = urlencode({"_limit": "300"})
        data = data.encode('ascii')
        url = "http://www.nepalstock.com/todaysprice"
        html = urlopen(url, data)
    except HTTPError as e:
        print(e)
    except URLError as e:
        print('The server could not be found!')
    else:
        print('Website Successfully Crawled.')
    bs = BeautifulSoup(html.read(), 'html.parser')
    table = bs.find('table', {'class':{'table table-condensed table-hover'}})
    rows= [row for row in table.find_all('tr') if len(row.find_all('td')) == 10]
    
    t_head=[]
    for data in rows[0].find_all('td'):
        t_head.append(data.text)
        
    t_body=[]   
    for row in rows[1:]:
        r={}
        for index,data in enumerate(row.find_all('td')):
            r[t_head[index]]=data.text.strip()
        t_body.append(r) 
        
    tz = pytz.timezone("Asia/Kathmandu")
    file_name = 'share_data_' + datetime.datetime.now(tz).strftime("%Y_%m_%d-%I_%M_%S_%p")+'.csv'
    with open(file_name,'w') as f:
        f.write(','.join(t_head)+ '\n')
        for row in t_body:
            f.write(','.join(row.values())+ '\n')
        print("Datas written to file successfully.")
        deta = Deta("c0icbcyf_QaF3eiMteBeBrQTjPhu6GvPhoMGc13C6") # configure your Deta project 
        drive = deta.Drive("csv_files") # access to your drive
        drive.put(file_name, path=f'./{file_name}') #uploadfile
        print(f"File {file_name} uploaded to Deta successfully.")
            

share_price_crawler()

Website Successfully Crawled.
Datas written to file successfully.
File share_data_2021_12_01-02_29_40_PM.csv uploaded to Deta successfully.


In [113]:
share_data=pd.read_csv("shares.csv")
share_data.head()

Unnamed: 0,S.N.,Traded Companies,No. Of Transaction,Max Price,Min Price,Closing Price,Traded Shares,Amount,Previous Closing,Difference Rs.
0,1,10% Prabhu Bank Debenture 2084,1,1020.0,1020.0,1020.0,55.0,56100.0,1028.0,-8.0
1,2,10.35% Agricultural Bank Debenture 2083,2,1015.1,1015.0,1015.0,260.0,263905.0,1032.0,-17.0
2,3,11% NIC Asia Debenture 082/83,1,1056.7,1056.7,1056.7,200.0,211340.0,1036.0,20.7
3,4,12 % Goodwill Finance Limited Debenture 2083,5,1140.0,1125.0,1140.0,671.0,759375.0,1140.0,0.0
4,5,12% ICFC Finance Limited Debenture 2083,9,1140.0,1118.0,1130.0,884.0,995770.0,1120.0,10.0


In [118]:
share_data.shape

(218, 10)