In [7]:
import urllib
import string
import urllib.request, time
import string
from enum import Enum
from datetime import datetime as dt, timedelta as td
from distutils.dir_util import mkpath
import random

# Here you can choose time period
class Period(Enum):
    tick, min1, min5, min10, min15, min30, hour, day, week, month = range(1, 11)

def get_fin_data(market, code, ticker, from_date, to_date, period,
    dtf=3, tmf=2, msor=1, sep=1, sep2=1, datf=1, at=1, fsp=1):

    from_str = from_date.strftime("%d.%m.%Y")
    to_str = to_date.strftime("%d.%m.%Y")

    params  = [ ('market', market),                 #market code
                ('em', code),                       #instrument code
                ('code', ticker),                   #instrument ticker
                ('apply', 0),                       #?
                ('df', from_date.day),              #period start, day
                ('mf', from_date.month-1),          #period start, month
                ('yf', from_date.year),             #period start, year
                ('from', from_str),                 #period start
                ('dt', to_date.day),                #period end, day
                ('mt', to_date.month-1),            #period end, month
                ('yt', to_date.year),               #period end, year
                ('to', to_str),                     #period end
                ('p', period.value),                #period type
                ('f', 'data'),                      #file name
                ('e', '.txt'),                      #file type
                ('cn', ticker),                     #contract name
                ('dtf',  dtf),                      #date format
                ('tmf',  tmf),                      #time format
                ('MSOR', msor),                     #candle time(0-open,1-close)
                ('mstime', 'on'),                   #moscow time (optional)
                ('mstimever', 1),                   #timezone correction
                ('sep',  sep),                      #elements separator
                ('sep2', sep2),                     #digits separator
                ('datf', datf),                     #column selection
                ('at',   at),                       #headers presence (optional)
                ('fsp',  fsp),                      #fill periods w/o trades
    ]

    url = ('http://export.finam.ru/{}.txt?'.format(ticker)
        + urllib.urlencode(params))
    print(url)
    response = urllib.urlopen(url)
    content = response.read()
    return string.split(content, '\n')


def gather_finam_data(emitents, from_str, to_str,
                      period = Period.day, path = './'):
    """Creates file with aggregated data from finam.ru for given parameters.

    Is able to bypass finam data size restriction by splitting query into
    several requests.

    Args:
        emitents : list of tuples (ticker : market)
        from_str : start date string in "dd.mm.yyyy" format
        to_str   : end date string in "dd.mm.yyyy" format
        period   : granularity of time series represented as Period Enum
    """
    result = []

    from_date = dt.strptime(from_str, "%d.%m.%y").date()
    to_date = dt.strptime(to_str, "%d.%m.%y").date()

    for i in range(len(emitents)):
        ticker = emitents[i][0]
        market = emitents[i][1]
        code = define_emitent_code(ticker, market)
        print(ticker, market, code)

        from_var = from_date
        to_var = to_date

        j = 0
        while(to_var >= from_var):
            print(from_var, min(from_var+td(days=364), to_var))
            data = get_fin_data(market, code, ticker, from_var,
                                min(from_var+td(days=364), to_var), period)
            from_var += td(days=365)
            result += data[(0 if (i or j) == 0 else 1):-1]
            j += 1
            time.sleep(1)

    em_str = '_'.join([em[0] for em in emitents])
    filename = 'stocks_{}_{}_{}'.format(em_str,
                                        from_str,
                                        to_str).replace('.', '')
    with open(path + filename + '.txt', 'w') as f:
        for item in result:
            f.write("{}\n".format(item))

tm_to_code = {}
markts = {}
emitents = {}

def load_finam_vars():
    url = 'http://www.finam.ru/cache/icharts/icharts.js'
    url2 = 'http://www.finam.ru/scripts/export.js'

    response = urllib.request.urlopen(url).read()
    finam_var_list = string.split(response, '\n')

    js_vars = {}

    # create js_vars with 'codes', 'tickers', 'markets' lists
    for key, v in {'codes': 0, 'tickers': 2, 'markets': 3}.iteritems():
        s = finam_var_list[v]
        js_vars[key] = (s[s.find('[') + 1 : s.find(']')].split(','))

    global tm_to_code, markts, emitents
    for c,t,m in zip(js_vars['codes'],js_vars['tickers'],js_vars['markets']):
        tm_to_code[(t,m)] = c
        markts.setdefault(t, []).append(m)
        emitents.setdefault(m, []).append(t.strip('\''))


def define_emitent_code(ticker, market):
    global tm_to_code
    if not tm_to_code:
        load_finam_vars()

    name = "'{}'".format(ticker)

    return tm_to_code[(name, market)]


def get_emitent_markets(ticker):
    global markts
    if not markts:
        load_finam_vars()

    name = "'{}'".format(ticker)

    return (ticker, sorted(markts[name]))


def get_market_emitents(market):
    global emitents
    if not emitents:
        load_finam_vars()

    return (market, sorted(emitents[market]))

#  Markets:
#   MosBirzha = 1
#   MosBirzha top = 200
#   ? = 8
#   Bonds = 2
#   Indexes = 6
#   Currencies = 45
#   US(BATS) = 25, 517
#   bad markets = 91, 519
#
#  'emitents' = [(ticker : market)]

experiment_number = '0'

market = '200'
sample_number = 2
portfolio_size = 2
data_folder = ('C:/Users/TorchPochmak/Desktop/')
download_path = data_folder + 'input/finam_raw/'
mkpath(download_path)
mkpath(data_folder + 'results/plot_data/')

print('result files will be in ' + download_path)


emitent_list = get_market_emitents(market)[1]

# set of portfolios to iterate through looks like:
# portfolios = [
#     {'emitents' : [('LKOH', '1'), ('SIBN', '1')]},
#     {'emitents' : [('AFLT', '1'), ('BANE', '1')]}
# ]
portfolios = []
for i in range(sample_number):
    random_emitent_sample = random.sample(emitent_list, portfolio_size)
    random_emitents = [(ticker, market) for ticker in random_emitent_sample]
    random_portfolio = {'emitents' : random_emitents}
    portfolios.append(random_portfolio)

for p in portfolios:
    print(p['emitents'])
    gather_finam_data(period = Period.min,
                                   from_str = '08.01.12',
                                   to_str = '08.07.18',
                                   path = download_path,
                                   **p)


result files will be in C:/Users/TorchPochmak/Desktop/input/finam_raw/


HTTPError: HTTP Error 403: Forbidden