In [53]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import json
from datetime import datetime
import random
import seaborn as sns
import math
from sklearn.preprocessing import StandardScaler

In [54]:
with open('data/yahoo/selected_financials_and_quotes.json', 'r') as json_file:
    financials_and_quotes = json.load(json_file)

In [55]:
tickers = list(financials_and_quotes.keys())

In [56]:
position_types_selected_for_analysis = {}
position_types_selected_for_analysis['quarterly_income'] = [
    'totalRevenue',
    'totalExpenses',
    'grossProfit',
    'netIncome',
    'ebit',
    'eBITDA',
    'operatingRevenue',
    'operatingIncome',
    'dilutedEPS',
]
position_types_selected_for_analysis['quarterly_balance'] = [
    'totalAssets',
    'totalLiabilitiesNetMinorityInterest',
    'totalDebt',
    'ordinarySharesNumber',
    'commonStockEquity',
    'tangibleBookValue',
    'investedCapital',
]
position_types_selected_for_analysis['quarterly_cashflow'] = [
    'operatingCashFlow',
    'investingCashFlow',
    'financingCashFlow',
    'freeCashFlow',
]
position_types_selected_for_analysis['annual_income'] = [
    'totalRevenue',
    'totalExpenses',
    'grossProfit',
    'netIncome',
    'ebit',
    'eBITDA',
    'operatingRevenue',
    'operatingIncome',
    'dilutedEPS',
]
position_types_selected_for_analysis['annual_balance'] = [
    'totalAssets',
    'totalLiabilitiesNetMinorityInterest',
    'totalDebt',
    'ordinarySharesNumber',
    'commonStockEquity',
    'tangibleBookValue',
    'investedCapital',
]
position_types_selected_for_analysis['annual_cashflow'] = [
    'operatingCashFlow',
    'investingCashFlow',
    'financingCashFlow',
    'freeCashFlow',
]

In [57]:
statement_types = [
    'quarterly_income',
    'quarterly_balance',
    'quarterly_cashflow',
    'annual_income',
    'annual_balance',
    'annual_cashflow'
 ]

In [58]:
# Standarize financials
for statement_type in statement_types:
    for position_type in position_types_selected_for_analysis[statement_type]:
        data = []
        for ticker in tickers:
            for index in range(len(financials_and_quotes[ticker][statement_type])):
                timestamp = list(financials_and_quotes[ticker][statement_type][index].keys())[0]
                data.append(financials_and_quotes[ticker][statement_type][index][timestamp][position_type])            

        scaler = StandardScaler().fit(np.array(data).reshape(-1, 1))
        for ticker in tickers:
            for index in range(len(financials_and_quotes[ticker][statement_type])):
                timestamp = list(financials_and_quotes[ticker][statement_type][index].keys())[0]
                standarized_position_name = position_type + 'Standarized'
                financials_and_quotes[ticker][statement_type][index][timestamp][standarized_position_name] = scaler.transform(np.array(financials_and_quotes[ticker][statement_type][index][timestamp][position_type]).reshape(-1, 1)).tolist()[0][0]

In [59]:
financials_and_quotes[tickers[0]][statement_types[0]][0]

{'1690754400': {'totalRevenue': 1672000000.0,
  'totalExpenses': 1539000000.0,
  'grossProfit': 658000000.0,
  'netIncome': 111000000.0,
  'ebit': 1470000000.0,
  'eBITDA': 227000000.0,
  'operatingRevenue': 1672000000.0,
  'operatingIncome': 133000000.0,
  'dilutedEPS': 3.82,
  'totalRevenueStandarized': -0.2884141775546588,
  'totalExpensesStandarized': -0.3209667043167068,
  'grossProfitStandarized': -0.3211107542164752,
  'netIncomeStandarized': -0.22049677075602242,
  'ebitStandarized': 0.037580393290452464,
  'eBITDAStandarized': -0.30269798556599664,
  'operatingRevenueStandarized': -0.36129126806212514,
  'operatingIncomeStandarized': -0.25042297091865245,
  'dilutedEPSStandarized': 0.189390233846048}}

In [60]:
def get_last_quotes_timestamp(ticker_data, max_timestamp):
    last_timestamp_before = '0'
    for timestamp in ticker_data['quotes'].keys():
        if int(timestamp) <= int(max_timestamp) and int(timestamp) > int(last_timestamp_before):
            last_timestamp_before = timestamp
    return last_timestamp_before if last_timestamp_before != '0' else None


In [61]:
def get_statement_index(statements, timestamp):
    for i in range(len(statements)):
        if list(statements[i].keys())[0] == timestamp:
            return i
        
    return None

In [62]:
# Calculate markers for qurterly financials and filter out statements without corrrsponding statemens of the same timestamp or with misssing quotes
for ticker in tickers:
    timestamps = set()
    for statement_type in ['quarterly_income', 'quarterly_balance', 'quarterly_cashflow']:
        for index in range(len(financials_and_quotes[ticker][statement_type])):
            timestamp = list(financials_and_quotes[ticker][statement_type][index].keys())[0]
            timestamps.add(timestamp)

    financials_and_quotes[ticker]['quarterly_markers'] = []
    for timestamp in timestamps:
        income_statement_index = get_statement_index(financials_and_quotes[ticker]['quarterly_income'], timestamp)
        balance_statement_index = get_statement_index(financials_and_quotes[ticker]['quarterly_balance'], timestamp)
        cashflow_statement_index = get_statement_index(financials_and_quotes[ticker]['quarterly_cashflow'], timestamp)
        if income_statement_index is None or balance_statement_index is None or cashflow_statement_index is None:
            if income_statement_index is not None:
                financials_and_quotes[ticker]['quarterly_income'].pop(income_statement_index)
            if balance_statement_index is not None:
                financials_and_quotes[ticker]['quarterly_balance'].pop(balance_statement_index)
            if cashflow_statement_index is not None:
                financials_and_quotes[ticker]['quarterly_cashflow'].pop(cashflow_statement_index)
            continue
        
        quotes_timestamp = get_last_quotes_timestamp(financials_and_quotes[ticker], timestamp)
        if quotes_timestamp is None:
            financials_and_quotes[ticker]['quarterly_income'].pop(income_statement_index)
            financials_and_quotes[ticker]['quarterly_balance'].pop(balance_statement_index)
            financials_and_quotes[ticker]['quarterly_cashflow'].pop(cashflow_statement_index)
            continue

        price = financials_and_quotes[ticker]['quotes'][quotes_timestamp]['close']
        shares_num = financials_and_quotes[ticker]['quarterly_balance'][balance_statement_index][timestamp]['ordinarySharesNumber']
        net_income = financials_and_quotes[ticker]['quarterly_income'][income_statement_index][timestamp]['netIncome']
        equity = financials_and_quotes[ticker]['quarterly_balance'][balance_statement_index][timestamp]['commonStockEquity']
        tangible_book_value = financials_and_quotes[ticker]['quarterly_balance'][balance_statement_index][timestamp]['tangibleBookValue']
        total_revenue = financials_and_quotes[ticker]['quarterly_income'][income_statement_index][timestamp]['totalRevenue']
        gross_profit = financials_and_quotes[ticker]['quarterly_income'][income_statement_index][timestamp]['grossProfit']
        ebit = financials_and_quotes[ticker]['quarterly_income'][income_statement_index][timestamp]['ebit']
        ebitda = financials_and_quotes[ticker]['quarterly_income'][income_statement_index][timestamp]['eBITDA']
        operating_income = financials_and_quotes[ticker]['quarterly_income'][income_statement_index][timestamp]['operatingIncome']
        debt = financials_and_quotes[ticker]['quarterly_balance'][balance_statement_index][timestamp]['totalDebt']
        assets = financials_and_quotes[ticker]['quarterly_balance'][balance_statement_index][timestamp]['totalAssets']
        operating_cashflow = financials_and_quotes[ticker]['quarterly_cashflow'][cashflow_statement_index][timestamp]['operatingCashFlow']
        investing_cashflow = financials_and_quotes[ticker]['quarterly_cashflow'][cashflow_statement_index][timestamp]['investingCashFlow']
        free_cashflow = financials_and_quotes[ticker]['quarterly_cashflow'][cashflow_statement_index][timestamp]['freeCashFlow']

        marketcap = price * shares_num
        markers = {
            'marketcap': marketcap,
            'price/earnings': marketcap / net_income if net_income != 0 else 10e9,
            'price/equity': marketcap / equity if equity != 0 else 10e9,
            'price/tangibleBookValue': marketcap / tangible_book_value if tangible_book_value != 0 else 10e9,
            'netIncomeMargin': net_income / total_revenue if total_revenue != 0 else 10e9,
            'grossProfitMargin': gross_profit / total_revenue if total_revenue != 0 else 10e9,
            'ebitMargin': ebit / total_revenue if total_revenue != 0 else 10e9,
            'ebitdaMargin': ebitda / total_revenue if total_revenue != 0 else 10e9,
            'operatingIncomeMargin': operating_income / total_revenue if total_revenue != 0 else 10e9,
            'operatingCashFlowMargin': operating_cashflow / total_revenue if total_revenue != 0 else 10e9,
            'investingCashFlowMargin': investing_cashflow / total_revenue if total_revenue != 0 else 10e9,
            'freeCashFlowMargin': free_cashflow / total_revenue if total_revenue != 0 else 10e9,
            'debt/assets': debt / assets if assets != 0 else 10e9,
            'netIncome/equity': net_income / equity if equity != 0 else 10e9,
            'netIncome/tangibleBookValue': net_income / tangible_book_value if tangible_book_value != 0 else 10e9,
            'netIncome/totalAssets': net_income / assets if assets != 0 else 10e9,
        }
        financials_and_quotes[ticker]['quarterly_markers'].append({
            timestamp: markers
        })

In [63]:
# Calculate markers for annual financials and filter out statements without corrrsponding statemens of the same timestamp or with misssing quotes
for ticker in tickers:
    timestamps = set()
    for statement_type in ['annual_income', 'annual_balance', 'annual_cashflow']:
        for index in range(len(financials_and_quotes[ticker][statement_type])):
            timestamp = list(financials_and_quotes[ticker][statement_type][index].keys())[0]
            timestamps.add(timestamp)

    financials_and_quotes[ticker]['annual_markers'] = []
    for timestamp in timestamps:
        income_statement_index = get_statement_index(financials_and_quotes[ticker]['annual_income'], timestamp)
        balance_statement_index = get_statement_index(financials_and_quotes[ticker]['annual_balance'], timestamp)
        cashflow_statement_index = get_statement_index(financials_and_quotes[ticker]['annual_cashflow'], timestamp)
        if income_statement_index is None or balance_statement_index is None or cashflow_statement_index is None:
            if income_statement_index is not None:
                financials_and_quotes[ticker]['annual_income'].pop(income_statement_index)
            if balance_statement_index is not None:
                financials_and_quotes[ticker]['annual_balance'].pop(balance_statement_index)
            if cashflow_statement_index is not None:
                financials_and_quotes[ticker]['annual_cashflow'].pop(cashflow_statement_index)
            continue

        quotes_timestamp = get_last_quotes_timestamp(financials_and_quotes[ticker], timestamp)
        if quotes_timestamp is None:
            financials_and_quotes[ticker]['annual_income'].pop(income_statement_index)
            financials_and_quotes[ticker]['annual_balance'].pop(balance_statement_index)
            financials_and_quotes[ticker]['annual_cashflow'].pop(cashflow_statement_index)
            continue

        price = financials_and_quotes[ticker]['quotes'][quotes_timestamp]['close']
        shares_num = financials_and_quotes[ticker]['annual_balance'][balance_statement_index][timestamp]['ordinarySharesNumber']
        net_income = financials_and_quotes[ticker]['annual_income'][income_statement_index][timestamp]['netIncome']
        equity = financials_and_quotes[ticker]['annual_balance'][balance_statement_index][timestamp]['commonStockEquity']
        tangible_book_value = financials_and_quotes[ticker]['annual_balance'][balance_statement_index][timestamp]['tangibleBookValue']
        total_revenue = financials_and_quotes[ticker]['annual_income'][income_statement_index][timestamp]['totalRevenue']
        gross_profit = financials_and_quotes[ticker]['annual_income'][income_statement_index][timestamp]['grossProfit']
        ebit = financials_and_quotes[ticker]['annual_income'][income_statement_index][timestamp]['ebit']
        ebitda = financials_and_quotes[ticker]['annual_income'][income_statement_index][timestamp]['eBITDA']
        operating_income = financials_and_quotes[ticker]['annual_income'][income_statement_index][timestamp]['operatingIncome']
        debt = financials_and_quotes[ticker]['annual_balance'][balance_statement_index][timestamp]['totalDebt']
        assets = financials_and_quotes[ticker]['annual_balance'][balance_statement_index][timestamp]['totalAssets']
        operating_cashflow = financials_and_quotes[ticker]['annual_cashflow'][cashflow_statement_index][timestamp]['operatingCashFlow']
        investing_cashflow = financials_and_quotes[ticker]['annual_cashflow'][cashflow_statement_index][timestamp]['investingCashFlow']
        free_cashflow = financials_and_quotes[ticker]['annual_cashflow'][cashflow_statement_index][timestamp]['freeCashFlow']

        if tangible_book_value < 0:
            print(ticker, timestamp, equity, tangible_book_value)

        marketcap = price * shares_num
        markers = {
            'marketcap': marketcap,
            'price/earnings': marketcap / net_income if net_income != 0 else 10e9,
            'price/equity': marketcap / equity if equity != 0 else 10e9,
            'price/tangibleBookValue': marketcap / tangible_book_value if tangible_book_value != 0 else 10e9,
            'netIncomeMargin': net_income / total_revenue if total_revenue != 0 else 10e9,
            'grossProfitMargin': gross_profit / total_revenue if total_revenue != 0 else 10e9,
            'ebitMargin': ebit / total_revenue if total_revenue != 0 else 10e9,
            'ebitdaMargin': ebitda / total_revenue if total_revenue != 0 else 10e9,
            'operatingIncomeMargin': operating_income / total_revenue if total_revenue != 0 else 10e9,
            'operatingCashFlowMargin': operating_cashflow / total_revenue if total_revenue != 0 else 10e9,
            'investingCashFlowMargin': investing_cashflow / total_revenue if total_revenue != 0 else 10e9,
            'freeCashFlowMargin': free_cashflow / total_revenue if total_revenue != 0 else 10e9,
            'debt/assets': debt / assets if assets != 0 else 10e9,
            'netIncome/equity': net_income / equity if equity != 0 else 10e9,
            'netIncome/tangibleBookValue': net_income / tangible_book_value if tangible_book_value != 0 else 10e9,
            'netIncome/totalAssets': net_income / assets if assets != 0 else 10e9,
        }
        financials_and_quotes[ticker]['annual_markers'].append({
            timestamp: markers
        })


AAL 1577746800 -118000000.0 -6293000000.0
AAL 1672441200 -5799000000.0 -11949000000.0
AAL 1609369200 -6867000000.0 -12987000000.0
AAL 1640905200 -7340000000.0 -13419000000.0
ABBV 1577746800 -8172000000.0 -42425000000.0
ABBV 1672441200 17254000000.0 -82341000000.0
ABBV 1609369200 13076000000.0 -102924000000.0
ABBV 1640905200 15408000000.0 -92922000000.0
ABT 1577746800 31088000000.0 -9132000000.0
ABT 1609369200 32784000000.0 -5744000000.0
ABT 1640905200 35802000000.0 -168000000.0
ADBE 1669762800 14051000000.0 -185000000.0
ADI 1604098800 11997945000.0 -3930760000.0
ADI 1667170800 36465323000.0 -3713217000.0
ADI 1635631200 37992542000.0 -4193098000.0
ADP 1688076000 3509100000.0 -173900000.0
ADP 1656540000 3225300000.0 -408300000.0
ADSK 1675119600 1145000000.0 -2887000000.0
ADSK 1643583600 849100000.0 -3248500000.0
ADSK 1612047600 965500000.0 -1829600000.0
ADSK 1580425200 -139100000.0 -2655000000.0
AES 1672441200 1599000000.0 -604000000.0
AES 1640905200 1973000000.0 -654000000.0
AJG 1577746

In [64]:
financials_and_quotes['ADBE']['annual_markers'][2]

{'1638226800': {'marketcap': 326557745361.3281,
  'price/earnings': 67.72246896750895,
  'price/equity': 22.069186008064346,
  'price/tangibleBookValue': 1275.616192817688,
  'netIncomeMargin': 0.3054798859676908,
  'grossProfitMargin': 0.8818498574596135,
  'ebitMargin': 0.3685777636997149,
  'ebitdaMargin': 0.4184985745961356,
  'operatingIncomeMargin': 0.3675641431738993,
  'operatingCashFlowMargin': 0.4575863161229015,
  'investingCashFlowMargin': -0.22293316439657904,
  'freeCashFlowMargin': 0.43668039277795373,
  'debt/assets': 0.17154289490106825,
  'netIncome/equity': 0.3258768669324863,
  'netIncome/tangibleBookValue': 18.8359375,
  'netIncome/totalAssets': 0.1770125913145626}}

In [65]:
for ticker in tickers:
    if len(financials_and_quotes[ticker]['annual_markers']) == 0:
        continue
    x = financials_and_quotes[ticker]['annual_markers'][0][list(financials_and_quotes[ticker]['annual_markers'][0].keys())[0]]
    if x['netIncome/equity'] * x['netIncome/tangibleBookValue'] < 0:
        print(ticker, x)

ABT {'marketcap': 152985287218.33307, 'price/earnings': 41.493161708254156, 'price/equity': 4.921039861629345, 'price/tangibleBookValue': -16.752659572747817, 'netIncomeMargin': 0.11556544633901705, 'grossProfitMargin': 0.585287111334002, 'ebitMargin': 0.1458437813440321, 'ebitdaMargin': 0.20652582748244735, 'operatingIncomeMargin': 0.14205115346038114, 'operatingCashFlowMargin': 0.1923269809428285, 'investingCashFlowMargin': -0.05688941825476429, 'freeCashFlowMargin': 0.1409854563691073, 'debt/assets': 0.2783154359450263, 'netIncome/equity': 0.11859881626351004, 'netIncome/tangibleBookValue': -0.40374507227332457, 'netIncome/totalAssets': 0.054310840072473374}
ADI {'marketcap': 43795044627.43863, 'price/earnings': 35.87519967253101, 'price/equity': 3.6502121511174312, 'price/tangibleBookValue': -11.14162264484187, 'netIncomeMargin': 0.217874138684318, 'grossProfitMargin': 0.6586544914061184, 'ebitMargin': 0.268589498302355, 'ebitdaMargin': 0.41331819635570305, 'operatingIncomeMargin':

In [66]:
financials_and_quotes[tickers[0]]['annual_markers'][0]

{'1604098800': {'marketcap': 31239538879.39453,
  'price/earnings': 43.44859371264886,
  'price/equity': 6.4107405867831995,
  'price/tangibleBookValue': 70.99895199862394,
  'netIncomeMargin': 0.13466941374789287,
  'grossProfitMargin': 0.5313729162764562,
  'ebitMargin': 0.17231691327964038,
  'ebitdaMargin': 0.23000561902978087,
  'operatingIncomeMargin': 0.15845663982019104,
  'operatingCashFlowMargin': 0.17250421427233564,
  'investingCashFlowMargin': -0.02753324592620341,
  'freeCashFlowMargin': 0.15021539614159954,
  'debt/assets': 0.24503999169003843,
  'netIncome/equity': 0.14754771188179766,
  'netIncome/tangibleBookValue': 1.634090909090909,
  'netIncome/totalAssets': 0.07468577957826945}}

In [67]:
financials_and_quotes[tickers[0]]['quotes'].keys()

dict_keys(['1575297000', '1575383400', '1575469800', '1575556200', '1575642600', '1575901800', '1575988200', '1576074600', '1576161000', '1576247400', '1576506600', '1576593000', '1576679400', '1576765800', '1576852200', '1577111400', '1577197800', '1577370600', '1577457000', '1577716200', '1577802600', '1577975400', '1578061800', '1578321000', '1578407400', '1578493800', '1578580200', '1578666600', '1578925800', '1579012200', '1579098600', '1579185000', '1579271400', '1579617000', '1579703400', '1579789800', '1579876200', '1580135400', '1580221800', '1580308200', '1580394600', '1580481000', '1580740200', '1580826600', '1580913000', '1580999400', '1581085800', '1581345000', '1581431400', '1581517800', '1581604200', '1581690600', '1582036200', '1582122600', '1582209000', '1582295400', '1582554600', '1582641000', '1582727400', '1582813800', '1582900200', '1583159400', '1583245800', '1583332200', '1583418600', '1583505000', '1583760600', '1583847000', '1583933400', '1584019800', '15841062

In [68]:
financials_and_quotes[tickers[0]]['quotes']['1575297000']

{'adjclose': 78.15552520751953,
 'close': 80.3499984741211,
 'high': 80.98999786376953,
 'low': 80.0199966430664,
 'open': 80.77999877929688,
 'volume': 1775600}

In [69]:
for ticker in tickers:
    quotes_stats = {}
    for timestamp in financials_and_quotes[ticker]['quotes'].keys():
        time = datetime.fromtimestamp(int(timestamp))
        year = time.year
        month = time.month
        day = time.day

        if year not in quotes_stats.keys():
            quotes_stats[year] = {}
        if month not in quotes_stats[year].keys():
            quotes_stats[year][month] = {}

        if financials_and_quotes[ticker]['quotes'][timestamp]['volume'] is None:
            quotes_stats[year][month][day] = None
            continue

        quotes_stats[year][month][day] = {
            'volume': financials_and_quotes[ticker]['quotes'][timestamp]['volume'],
            'volumePrice': financials_and_quotes[ticker]['quotes'][timestamp]['volume'] * financials_and_quotes[ticker]['quotes'][timestamp]['close'],
            'spread': (financials_and_quotes[ticker]['quotes'][timestamp]['high'] - financials_and_quotes[ticker]['quotes'][timestamp]['low']) / financials_and_quotes[ticker]['quotes'][timestamp]['close'],
            'average': financials_and_quotes[ticker]['quotes'][timestamp]['close'],
            'low': financials_and_quotes[ticker]['quotes'][timestamp]['low'],
            'high': financials_and_quotes[ticker]['quotes'][timestamp]['high'],
        }

    for year in quotes_stats:
        for month in quotes_stats[year]:
            volume = 0
            volumePrice = 0
            spreadSum = 0
            minLow = 10e9
            maxHigh = -10e9
            missingDays = 0

            for day in quotes_stats[year][month]:
                if quotes_stats[year][month][day] is None:
                    missingDays += 1
                    continue

                volume += quotes_stats[year][month][day]['volume']
                volumePrice += quotes_stats[year][month][day]['volumePrice']
                minLow = min(minLow, quotes_stats[year][month][day]['low'])
                maxHigh = max(maxHigh, quotes_stats[year][month][day]['high'])
                spreadSum += quotes_stats[year][month][day]['spread']

            quotes_stats[year][month]['stats'] = {
                'low': minLow,
                'high': maxHigh,
                'volume': volume,
                'volumePrice': volumePrice,
                'spread': (maxHigh - minLow)  if maxHigh != -10e9 and minLow != 10e9 else None,
                'averagePrice': volumePrice / volume if volume != 0 else None,
                'dailySpreadSum': spreadSum,
                'averageDailySpread': spreadSum / (len(quotes_stats[year][month].keys()) - missingDays) if len(quotes_stats[year][month].keys()) - missingDays != 0 else None,
                'missingDays': missingDays,
            }

        volume = 0
        volumePrice = 0
        dailySpreadSum = 0
        monthlySpreadSum = 0
        minLow = 10e9
        maxHigh = -10e9
        missingDays = 0
        missingMonths = 0
        daysCount = 0
        minLow = 10e9
        maxHigh = -10e9

        for month in quotes_stats[year]:
            if quotes_stats[year][month]['stats']['missingDays'] == len(quotes_stats[year][month].keys()) - 1:
                missingMonths += 1
                continue

            volume += quotes_stats[year][month]['stats']['volume']
            volumePrice += quotes_stats[year][month]['stats']['volumePrice']
            minLow = min(minLow, quotes_stats[year][month]['stats']['low'])
            maxHigh = max(maxHigh, quotes_stats[year][month]['stats']['high'])
            dailySpreadSum += quotes_stats[year][month]['stats']['dailySpreadSum']
            monthlySpreadSum += quotes_stats[year][month]['stats']['spread']
            missingDays += quotes_stats[year][month]['stats']['missingDays']
            daysCount += len(quotes_stats[year][month].keys()) - quotes_stats[year][month]['stats']['missingDays']

        quotes_stats[year]['stats'] = {
            'low': minLow,
            'high': maxHigh,
            'volume': volume,
            'volumePrice': volumePrice,
            'spread': maxHigh - minLow if maxHigh != -10e9 and minLow != 10e9 else None,
            'averagePrice': volumePrice / volume if volume != 0 else None,
            'averageDailySpread': dailySpreadSum / daysCount if daysCount != 0 else None,
            'averageMonthlySpread': monthlySpreadSum / (len(quotes_stats[year]) - missingMonths) if len(quotes_stats[year]) - missingMonths != 0 else None,
            'missingDays': missingDays,
            'missingMonths': missingMonths,
        }

    financials_and_quotes[ticker]['quotes_stats'] = quotes_stats


In [70]:
financials_and_quotes[tickers[0]]['quotes_stats'][2020]['stats']

{'low': 61.130001068115234,
 'high': 120.23999786376953,
 'volume': 468519400,
 'volumePrice': 42014381137.28027,
 'spread': 59.1099967956543,
 'averagePrice': 89.67479497600371,
 'averageDailySpread': 0.02420629329433416,
 'averageMonthlySpread': 11.454166730244955,
 'missingDays': 0,
 'missingMonths': 0}

In [71]:
financials_and_quotes[tickers[0]]['quotes_stats'][2020][1]['stats']

{'low': 82.33999633789062,
 'high': 90.63999938964844,
 'volume': 38918800,
 'volumePrice': 3389516248.4344482,
 'spread': 8.300003051757812,
 'averagePrice': 87.09200305339446,
 'dailySpreadSum': 0.30127987152662977,
 'averageDailySpread': 0.014346660548887133,
 'missingDays': 0}

In [72]:
financials_and_quotes[tickers[0]]['quotes_stats'][2020][1][2]

{'volume': 1410500,
 'volumePrice': 121232470.6954956,
 'spread': 0.01337989024678407,
 'average': 85.94999694824219,
 'low': 85.19999694824219,
 'high': 86.3499984741211}

In [73]:
file_path = f'data/yahoo/merged_financials_quotes_markers.json'
with open(file_path, 'w') as json_file:
    json.dump(financials_and_quotes, json_file, indent=4)