-
Notifications
You must be signed in to change notification settings - Fork 0
/
Extarct_tickers_from_stock_exchange_data.py
70 lines (44 loc) · 1.78 KB
/
Extarct_tickers_from_stock_exchange_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Apr 9 21:18:40 2018
Extarcts list of companies from Nasdaq and NYSE, filters out companies without data
and those companie that have MarketCap less than specified
@author: dimm
"""
import pandas as pd
import os
intraQuarterPath = "intraQuarter"
def Extract_Initial_Stock_List():
statspath = intraQuarterPath + "/_KeyStats"
stock_list = [x[0] for x in os.walk(statspath)]
tickers = []
for each_dir in stock_list[1:]:
each_file = os.listdir(each_dir)
if len(each_file) > 0:
ticker = each_dir.split("/")[2].upper()
tickers.append(ticker)
df = pd.DataFrame(columns = ['Symbol'], data = tickers)
#df.to_csv("Tickers.csv")
return df
def f (value):
value = value.replace('$', '')
if "B" in value:
return float(value.replace("B", ''))*1000000000
elif "M" in value:
return float(value.replace("M", ''))*1000000
df1 = pd.read_csv('companylist_nyse.csv')[['Symbol', 'MarketCap']]
df1 = df1[df1.MarketCap.notnull()]
df1['MarketCap'] = df1['MarketCap'].apply(lambda x: f(x))
df2 = pd.read_csv('companylist_nasdaq.csv')[['Symbol', 'MarketCap']]
df2 = df2[df2.MarketCap.notnull()]
df2['MarketCap'] = df2['MarketCap'].apply(lambda x: f(x))
#df_c = df1.merge(df2, on='Symbol', how='outer')
df_c = pd.concat([df1, df2], ignore_index=True, verify_integrity=True)
df_c = df_c[df_c.MarketCap >= 900000000]
if os.path.exists('Tickers.csv'):
tickers = pd.read_csv("Tickers.csv")[['Symbol']]
else:
tickers = Extract_Initial_Stock_List()[['Symbol']]
new_df = tickers.join(df_c.set_index('Symbol'), how='outer', on='Symbol')[['Symbol']]
new_df.to_csv("Tickers.csv")