In [6]:
from urllib.request import urlopen
from urllib.error import HTTPError
from bs4 import BeautifulSoup
import csv

def add_fund_data(filename, fdetail, rows):
    with open(filename, "a") as myfile:
        for row in rows[1:]:
            cells = row.find_all("td")
            if 'Total Stock' in cells[1].get_text(): break
            myfile.write('%s,%s,%s'%(fdetail['name'],fdetail['fundid'], fdetail['date']))
            for cell in cells[1:]: myfile.write(',%s'%cell.get_text())
            myfile.write('\n')

def append_pf_details(pf_file, fundmaster_file):
    data_exists = {} # Make it fool proof so that running again, won't hurt!
    with open(pf_file, 'r') as myfile:
        reader = csv.DictReader(myfile)
        for row in reader: data_exists[row['fundid'] + '_' + row['date']] = 1

    with open(fundmaster_file, 'r') as myfile:
        reader = csv.DictReader(myfile)
        for row in reader:
            html = urlopen(row['url'])
            soup = BeautifulSoup(html.read(), "html.parser")
            fdetail = {}
            fdetail['name'] = soup.find(id='ctl00_ContentPlaceHolder1_ucQuoteHeader_lblName').get_text()
            fdetail['fundid'] = soup.find(id='ctl00_ContentPlaceHolder1_ucQuoteHeader_spnCode').get_text()
            fdetail['date'] = soup.find(id='ctl00_ContentPlaceHolder1_lblPfSummaryDate').get_text()
            fundkey = fdetail['fundid'] + '_' + fdetail['date']
            if fundkey in data_exists: 
                print ('SKIPPING as Data exists for %s %s'%(fdetail['name'], fdetail['date']))
                continue
            rows = soup.find("", {"class": "pf_detailed"}).find("tbody").find_all("tr")
            add_fund_data(pf_file, fdetail, rows)
            print ('Added for %s %s'%(fdetail['name'], fdetail['date']))
    

def generate_universe(pf_file, map_file, out_file):
    missing, existing = set(), set()
    symbol_map = {}
    
    with open(map_file, 'r') as myfile:
        reader = csv.DictReader(myfile)
        for row in reader: symbol_map[row['Mstarname']] = (row['NSE'], row['BSE'])
    
    with open(pf_file, 'r') as myfile:
        reader = csv.DictReader(myfile)
        for row in reader: 
            if (row['name'] in symbol_map): 
                existing.add(row['name'])
            else: 
                missing.add(row['name'])
    
    if len(missing) > 0: print ('Fix these:', missing); return

    with open(out_file, "w") as myfile:
        myfile.write('%s,%s,%s\n'%('Mstarname', 'NSE', 'BSE'))
        for s in existing: 
            nse, bse = symbol_map[s][0], symbol_map[s][1]
            myfile.write('%s,%s,%s\n'%(s,nse,bse))
        
    
if __name__ == '__main__':
    pf_file = 'pf_details.csv'
    fundmaster_file = 'fund_names.csv'
    map_file = 'symbol_map.csv'
    out_file = 'master_list.csv'
    #append_pf_details(pf_file, fundmaster_file)
    generate_universe(pf_file, map_file, out_file)
    

    



In [2]:
a = set()
a.add('hello')
a.add('world')
print('hello', len(a))

hello 2
