## Real-time Data of Companies

In [1]:
# see https://tse.ir/MarketWatch.html?cat=cash

import requests
import json
from datetime import datetime

now = int(datetime.now().timestamp()*1000)
r = requests.get(f'http://tse.ir/json/MarketWatch/data_1.json?_={now}')

In [5]:
translate = {
    'namad': 'نماد',
    'name': 'نام',
    'hajm': 'حجم',
    'arzesh': 'ارزش',
    'dm': 'دفعات معامله',
    'bish': 'بیشترین',
    'kam': 'کمترین',
    'pghey': 'مقدار قیمت پایانی',
    'ptagh': 'تغییر قیمت پایانی',
    'pdar': 'درصد تغییر قیمت پایانی',
    'aghey': 'مقدار قیمت آخرین',
    'atagh': 'تغییر قیمت آخرین',
    'adar': 'درصد تغییر قیمت آحرین',
    'rgh': 'روز قبل',
    'tdsh': 'تاثیر در شاخص',
    'pe': 'P/E',
    'eps': 'EPS',
    'ab': 'ارزش بازار',
    'ba': 'بهترین عرضه',
    'bt': 'بهترین تقاضا'
}

In [11]:
import numpy as np

def _process_value(key, value):
    if key != 'namad' and key != 'name':
        if ',' in value:
            value = value.replace(',', '')

        if not value or value == '_':
            value = 0
        elif 'B' in value:
            value = value.replace(' B', '')
            value = float(value)*np.power(10, 9)
        elif 'M' in value:
            value = value.replace(' M', '')
            value = float(value)*np.power(10, 6)
        
        value = float(value)
            
    return value

In [12]:
import pandas as pd

index=[]
data = {
    'bi': [],
    'namad': [],
    'name': [],
    'hajm': [],
    'arzesh': [],
    'dm': [],
    'bish': [],
    'kam': [],
    'pghey': [],
    'ptagh': [],
    'pdar': [],
    'aghey': [],
    'atagh': [],
    'adar': [],
    'rgh': [],
    'tdsh': [],
    'pe': [],
    'eps': [],
    'ab': []
}

res = r.json()
for row in res['bData']:
    if row['bi'] != 'b1':
        continue
    index.append(row['i'])
    data['bi'].append(row['bi'])
    for key_value in row['val']:
        key = key_value['t']
        if key == 'ba':
            continue
        value = _process_value(key, key_value['v'])
        data[key].append(value)
        
df = pd.DataFrame(data, columns=data.keys(), index=index)

In [13]:
df

Unnamed: 0,bi,namad,name,hajm,arzesh,dm,bish,kam,pghey,ptagh,pdar,aghey,atagh,adar,rgh,tdsh,pe,eps,ab
IRO1ALBZ0001,b1,والبر1,سرمايه‌ گذاري‌ البرز(هلدينگ‌,2030000.0,3.390000e+10,848.0,17100.0,16510.0,16730.0,-110.0,-0.65,16510.0,-330.0,-1.96,16840.0,-13.27,32.30,518.0,8.030570e+13
IRO1AMIN0001,b1,دامين1,داروسازي‌ امين‌,138984.0,1.900000e+09,71.0,13890.0,13890.0,14160.0,-10.0,-0.07,13890.0,-280.0,-1.98,14170.0,-0.34,27.44,516.0,1.911600e+13
IRO1ASIA0001,b1,آسيا1,بيمه آسيا,1980000.0,4.700000e+09,156.0,2360.0,2360.0,2400.0,0.0,0.00,2360.0,-40.0,-1.67,2400.0,0.00,80.00,30.0,5.791630e+13
IRO1AYEG0001,b1,پرديس1,سرمايه گذاري پرديس,4210000.0,2.730000e+10,758.0,6650.0,6440.0,6520.0,-50.0,-0.76,6450.0,-120.0,-1.83,6570.0,-1.57,8.66,753.0,8.150000e+12
IRO1BAHN0001,b1,فباهنر1,مس‌ شهيدباهنر,1250000.0,6.620000e+10,1384.0,54840.0,51930.0,52800.0,-180.0,-0.34,51930.0,-1050.0,-1.98,52980.0,-5.09,69.02,765.0,5.940000e+13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
IRO1TRNS0001,b1,بترانس1,ايران‌ ترانسفو,37620000.0,1.077000e+11,1389.0,2940.0,2850.0,2860.0,-40.0,-1.38,2850.0,-50.0,-1.72,2900.0,-36.59,59.58,48.0,1.041119e+14
IRO1TSBE0001,b1,وبشهر1,توسعه‌ صنايع‌ بهشهر(هلدينگ,3190000.0,3.740000e+10,405.0,12000.0,11690.0,11770.0,-150.0,-1.26,11690.0,-230.0,-1.93,11920.0,-31.09,30.41,387.0,9.710250e+13
IRO1TSHE0001,b1,وتوشه1,سرمايه‌ گذاري‌ پارس‌ توشه‌,74962.0,1.700000e+09,32.0,22960.0,22960.0,23400.0,-20.0,-0.09,22960.0,-460.0,-1.96,23420.0,-0.75,29.92,782.0,3.510000e+13
IRO1TSRZ0001,b1,كرازي1,كارخانجات‌توليدي‌شيشه‌رازي‌,3970000.0,9.400000e+10,1864.0,24800.0,23490.0,23690.0,-270.0,-1.13,23490.0,-470.0,-1.96,23960.0,-8.14,48.85,485.0,2.842800e+13


## Historical Data of Companies

In [14]:
from html.parser import HTMLParser
import urllib.request

class TseParser(HTMLParser):
    def __init__(self):
        HTMLParser.__init__(self)
        self.header = False
        self.headers = []
        self.rows = []
        self.cols = []
    
    def handle_starttag(self, tag, attrs):
        if tag == 'thead':
            self.header = True
        elif tag == 'tr':
            self.cols = []

    def handle_endtag(self, tag):
        if tag == 'thead':
            self.header = False
            self.headers.append(self.cols)
        elif tag == 'tr' and not self.header:
            self.rows.append(self.cols)

    def handle_data(self, data):
        self.cols.append(data)

url = "http://tse.ir/json/Instrument/TradeHistory/TradeHistory_{}.html"
co_data = {}
for co_id in index:
    fp = urllib.request.urlopen(url.format(co_id))
    mybytes = fp.read()
    mystr = mybytes.decode("utf8")
    fp.close()        

    parser = TseParser()
    parser.feed(mystr)

    co_data[co_id] = parser.rows

In [15]:
matrix_map = {}
for m in range(2,12):
    for d in range(1,30):
        m_str = str(m) if m > 9 else '0{}'.format(m)
        d_str = str(d) if d > 9 else '0{}'.format(d)
        key = '1399/{}/{}'.format(m_str,d_str)
        matrix_map[key] = [None] * len(index)

In [16]:
for i, co_id in enumerate(index):
    for row in co_data[co_id]:
        time = row[0]
        if time not in matrix_map:
            continue
        value = _process_value(None, row[6])
        matrix_map[time][i] = value

key = '1399/02/01'
for i in range(0, len(index)):
    matrix_map[key][i] = matrix_map[key][i] if matrix_map[key][i] is not None else 0

prev_key = None
for key in matrix_map.keys():
    for i in range(0, len(index)):
        matrix_map[key][i] = matrix_map[key][i] if matrix_map[key][i] is not None else \
                             matrix_map[prev_key][i]
    prev_key = key

In [17]:
counts = []
candidate_indices = []
for i in range(0, len(index)):
    cnt = 0
    for key in matrix_map.keys():
        if matrix_map[key][i] == 0:
            cnt += 1
    counts.append(cnt)
    if cnt == 0:
        candidate_indices.append(i)
len(candidate_indices)

32

In [18]:
matrix = []
for key in matrix_map.keys():
    row = [None] * len(candidate_indices)
    for i, candidate in enumerate(candidate_indices):
        row[i] = matrix_map[key][candidate]
    matrix.append(row)

df = pd.DataFrame(matrix)

In [21]:
df.to_csv('data/financial-b1.csv')

In [20]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,31
0,15106.0,7969.0,40202.0,6278.0,5964.0,11226.0,78728.0,23276.0,17112.0,10708.0,...,6707.0,5133.0,29134.0,7507.0,4356.0,40746.0,16643.0,13845.0,8433.0,18159.0
1,15803.0,8366.0,40279.0,6557.0,6157.0,11786.0,81151.0,24050.0,16639.0,11243.0,...,6747.0,5352.0,30450.0,7882.0,4362.0,41774.0,16324.0,14530.0,8575.0,18750.0
2,16593.0,8784.0,43873.0,6875.0,6390.0,12304.0,82962.0,24464.0,17232.0,11454.0,...,6667.0,5619.0,31821.0,8276.0,4515.0,43862.0,16597.0,15172.0,8884.0,18868.0
3,16593.0,8784.0,43873.0,6875.0,6390.0,12304.0,82962.0,24464.0,17232.0,11454.0,...,6667.0,5619.0,31821.0,8276.0,4515.0,43862.0,16597.0,15172.0,8884.0,18868.0
4,16593.0,8784.0,43873.0,6875.0,6390.0,12304.0,82962.0,24464.0,17232.0,11454.0,...,6667.0,5619.0,31821.0,8276.0,4515.0,43862.0,16597.0,15172.0,8884.0,18868.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
285,2500.0,4040.0,15790.0,23050.0,2960.0,4260.0,43940.0,21220.0,19350.0,6150.0,...,12930.0,5940.0,30710.0,21030.0,2850.0,28020.0,23410.0,11380.0,12990.0,27370.0
286,2460.0,3980.0,15680.0,23050.0,2950.0,4200.0,43700.0,21090.0,19350.0,6150.0,...,13400.0,6050.0,30330.0,21000.0,2870.0,27590.0,23140.0,12030.0,13110.0,27050.0
287,2440.0,3910.0,15510.0,22620.0,2930.0,4150.0,43200.0,21010.0,19350.0,6150.0,...,13300.0,6000.0,29870.0,20800.0,2830.0,27290.0,22680.0,12050.0,12860.0,26800.0
288,2430.0,3880.0,15390.0,22360.0,2920.0,4070.0,42690.0,21000.0,19350.0,6400.0,...,13070.0,5950.0,29360.0,20650.0,2790.0,26960.0,23000.0,11950.0,12780.0,26750.0


## Historical Data of Industries

In [26]:
# see https://tse.ir/indices.html#/industry

indices_request = requests.get('http://tse.ir/json/Indices/Indices.json')

In [28]:
industries = {}
for item in indices_request.json()['menu']:
    if item['t'] == 'صنعت':
        for category in item['cat']:
            industries[category['is']] = category['t']
industries

{'IRX6X01T0006': '01-زراعت',
 'IRX6X10T0006': '10-ذغال سنگ',
 'IRX6X13T0006': '13-كانه فلزي',
 'IRX6X14T0006': '14-ساير معادن',
 'IRX6X17T0006': '17-منسوجات',
 'IRX6X19T0006': '19-محصولات چرمي',
 'IRX6X20T0006': '20-محصولات چوبي',
 'IRX6X21T0006': '21-محصولات كاغذ',
 'IRX6X22T0006': '22-انتشار و چاپ',
 'IRX6X23T0006': '23-فراورده نفتي',
 'IRX6X25T0006': '25-لاستيك',
 'IRX6X26T0006': '26-كاني غيرفلزي',
 'IRX6X27T0006': '27-فلزات اساسي',
 'IRX6X28T0006': '28-محصولات فلزي',
 'IRX6X29T0006': '29-ماشين آلات',
 'IRX6X31T0006': '31-دستگاههاي برقي',
 'IRX6X32T0006': '32-وسايل ارتباطي',
 'IRX6X33T0006': '33-ابزار پزشكي',
 'IRX6X34T0006': '34-خودرو',
 'IRX6X35T0006': '35-حمل و نقل',
 'IRX6X36T0006': '36-مبلمان',
 'IRX6X38T0006': '38-قند و شكر',
 'IRX6X39T0006': '39-چند رشته اي ص',
 'IRX6X40T0006': '40-تامين آب، برق، گاز',
 'IRX6X42T0006': '42-غذايي بجز قند',
 'IRX6X43T0006': '43-مواد دارويي',
 'IRX6X44T0006': '44-شيميايي',
 'IRX6X45T0006': '45-پيمانكاري',
 'IRX6X47T0006': '47خرده فروشي به جز وسا

In [94]:
now = int(datetime.now().timestamp()*1000)
industry_data = {}
for industry in industries:
    url = f'http://tse.ir/json/Indices/Plot/plot_{industry}.json?_={now}'
    industry_request = requests.get(url)
    if industry != 'IRX6X26T0006':
        industry_data[industry] = idata = industry_request.json()['plotData'][-1]['yData'][0:-2]

In [95]:
df = pd.DataFrame(industry_data.values()).transpose()

In [96]:
df.to_csv('data/financial-industry.csv')

In [97]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,34,35,36,37,38,39,40,41,42,43
0,7415.0,788.0,10246.0,7708.0,1702.0,942.0,18703.0,6654.0,66750.0,208770.0,...,5648.0,769.0,158238.0,4096.0,96.0,865.0,7885.0,115.0,852.0,744.0
1,7247.0,787.0,10189.0,7822.0,1705.0,935.0,18703.0,6570.0,65380.0,214046.0,...,5591.0,772.0,155997.0,4055.0,94.0,881.0,7809.0,121.0,850.0,712.0
2,7220.0,782.0,10117.0,7541.0,1707.0,925.0,18703.0,6488.0,63072.0,209096.0,...,5632.0,776.0,154315.0,4037.0,89.0,888.0,7739.0,127.0,842.0,707.0
3,7541.0,777.0,10060.0,7241.0,1712.0,923.0,18703.0,6413.0,63709.0,209566.0,...,5652.0,770.0,155106.0,4052.0,86.0,875.0,7756.0,133.0,831.0,711.0
4,7918.0,738.0,10043.0,6891.0,1713.0,923.0,24389.0,6324.0,64557.0,208449.0,...,5706.0,774.0,154528.0,4023.0,82.0,877.0,7703.0,140.0,830.0,710.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1192,222511.0,56433.0,332955.0,225676.0,8508.0,25344.0,1751756.0,111257.0,1495953.0,3937569.0,...,30941.0,6207.0,1767595.0,64731.0,,10838.0,61263.0,,6930.0,3988.0
1193,224448.0,56006.0,332460.0,225481.0,8508.0,25722.0,1751756.0,111569.0,1490595.0,3878093.0,...,30815.0,6222.0,1763826.0,65036.0,,10811.0,61676.0,,6885.0,3909.0
1194,222619.0,55423.0,326985.0,224016.0,8508.0,25311.0,1751756.0,111742.0,1490595.0,3829373.0,...,30735.0,6292.0,1759611.0,64664.0,,10743.0,61638.0,,6816.0,
1195,,,,,,,,,,,...,,,,,,,,,,
