In [26]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import json
from datetime import datetime
import random

In [27]:
with open('data/yahoo/merged_financials.json', 'r') as json_file:
    yahoo_financials_json = json.load(json_file)

In [28]:
tickers = list(yahoo_financials_json.keys())

tickers[:10]

['A', 'AAL', 'AAP', 'AAPL', 'ABBV', 'ABC', 'ABMD', 'ABT', 'ACN', 'ADBE']

In [29]:
statement_types = list(yahoo_financials_json[tickers[0]].keys())

statement_types

['quarterly_income',
 'quarterly_balance',
 'quarterly_cashflow',
 'annual_income',
 'annual_balance',
 'annual_cashflow']

In [30]:
timetamps = [list(i.keys())[0] for i in yahoo_financials_json[tickers[0]][statement_types[0]]]
timetamps

['1690754400', '1667170800', '1675119600', '1682805600', '1635631200']

In [31]:
# Get a set of all position types present in statements by statement type
position_types_by_statement_type = {}
for type in statement_types:
    position_types_by_statement_type[type] = set()
    
for ticker in tickers:
    for statement_type in statement_types:
        statements = yahoo_financials_json[ticker][statement_type]
        for statement in statements:
            for position_type in list(statement.values())[0].keys():
                position_types_by_statement_type[statement_type] |= {position_type}

random.sample(position_types_by_statement_type[statement_types[0]], 10)

['otherunderPreferredStockDividend',
 'depreciationIncomeStatement',
 'operatingExpense',
 'pretaxIncome',
 'earningsFromEquityInterest',
 'totalExpenses',
 'reconciledCostOfRevenue',
 'operatingRevenue',
 'netIncome',
 'sellingAndMarketingExpense']

In [32]:
# Count occurences of each position type in statemens
position_occurences = {}
for statement_type in statement_types:
    position_occurences[statement_type] = {}
    for position_type in position_types_by_statement_type[statement_type]:
        position_occurences[statement_type][position_type] = 0

for ticker in tickers:
    for statement_type in statement_types:
        statements = yahoo_financials_json[ticker][statement_type]
        for statement in statements:
            for position_type in list(statement.values())[0].keys():
                position_occurences[statement_type][position_type] += 1

random.sample(position_occurences[statement_types[0]].items(), 10)

[('normalizedIncome', 2405),
 ('rentAndLandingFees', 64),
 ('otherNonOperatingIncomeExpenses', 1825),
 ('salariesAndWages', 350),
 ('basicAverageShares', 2411),
 ('insuranceAndClaims', 74),
 ('taxProvision', 2374),
 ('netInterestIncome', 2362),
 ('reconciledDepreciation', 2330),
 ('totalRevenue', 2405)]

In [33]:
# For each statement type get a list of position types sorted by their occurence
position_occurences_sorted = {}
for statement_type in statement_types:
    position_occurences_sorted[statement_type] = sorted(position_occurences[statement_type].items(), key=lambda x: x[1], reverse=True)

for statement_type in statement_types:
    print(statement_type)
    for position_type, occurence in position_occurences_sorted[statement_type]:
        print(f'\t{occurence}:\t {position_type}')
    print()

quarterly_income
	2411:	 dilutedAverageShares
	2411:	 basicAverageShares
	2409:	 basicEPS
	2409:	 dilutedEPS
	2405:	 totalRevenue
	2405:	 taxRateForCalcs
	2405:	 pretaxIncome
	2405:	 netIncomeFromContinuingAndDiscontinuedOperation
	2405:	 netIncome
	2405:	 netIncomeIncludingNoncontrollingInterests
	2405:	 taxEffectOfUnusualItems
	2405:	 netIncomeFromContinuingOperationNetMinorityInterest
	2405:	 operatingRevenue
	2405:	 netIncomeCommonStockholders
	2405:	 dilutedNIAvailtoComStockholders
	2405:	 normalizedIncome
	2405:	 netIncomeContinuousOperations
	2374:	 taxProvision
	2362:	 netInterestIncome
	2330:	 reconciledDepreciation
	2273:	 totalExpenses
	2269:	 ebit
	2230:	 netNonOperatingInterestIncomeExpense
	2200:	 interestExpense
	2174:	 normalizedEBITDA
	2174:	 operatingIncome
	2174:	 eBITDA
	2167:	 operatingExpense
	2149:	 reconciledCostOfRevenue
	2149:	 grossProfit
	2149:	 costOfRevenue
	2137:	 sellingGeneralAndAdministration
	2122:	 otherIncomeExpense
	2068:	 interestExpenseNonOperati

In [34]:
position_types_selected_for_analysis = {}
position_types_selected_for_analysis['quarterly_income'] = [
    'totalRevenue',
    'totalExpenses',
    'grossProfit',
    'netIncome',
    'ebit',
    'eBITDA',
    'operatingRevenue',
    'operatingIncome',
    'dilutedEPS',
]
position_types_selected_for_analysis['quarterly_balance'] = [
    'totalAssets',
    'totalLiabilitiesNetMinorityInterest',
    'totalDebt',
    'ordinarySharesNumber',
    'commonStockEquity',
    'tangibleBookValue',
    'investedCapital',
]
position_types_selected_for_analysis['quarterly_cashflow'] = [
    'operatingCashflow',
    'investingCashflow',
    'financingCashflow',
    'freeCashflow',
]
position_types_selected_for_analysis['annual_income'] = [
    'totalRevenue',
    'totalExpenses',
    'grossProfit',
    'netIncome',
    'ebit',
    'eBITDA',
    'operatingRevenue',
    'operatingIncome',
    'dilutedEPS',
]
position_types_selected_for_analysis['annual_balance'] = [
    'totalAssets',
    'totalLiabilitiesNetMinorityInterest',
    'totalDebt',
    'ordinarySharesNumber',
    'commonStockEquity',
    'tangibleBookValue',
    'investedCapital',
]
position_types_selected_for_analysis['annual_cashflow'] = [
    'operatingCashflow',
    'investingCashflow',
    'financingCashflow',
    'freeCashflow',
]


In [39]:
# Filter out statements missing position types selected for analysis
yahoo_financials_complete = {}
for ticker in tickers:
    yahoo_financials_complete[ticker] = {}
    for statement_type in statement_types:
        yahoo_financials_complete[ticker][statement_type] = []
        for statement in yahoo_financials_json[ticker][statement_type]:
            complete = True
            for position_type in position_types_selected_for_analysis[statement_type]:
                if position_type not in list(statement.values())[0].keys():
                    complete = False
                    break
            if complete:
                yahoo_financials_complete[ticker][statement_type].append(statement)

In [41]:
# Filter out positions not selected for analysis
yahoo_financials_selected = {}
for ticker in tickers:
    yahoo_financials_selected[ticker] = {}
    for statement_type in statement_types:
        yahoo_financials_selected[ticker][statement_type] = []
        for statement in yahoo_financials_complete[ticker][statement_type]:
            timestamp = list(statement.keys())[0]
            positions = {position_type: list(statement.values())[0][position_type] for position_type in position_types_selected_for_analysis[statement_type]}
            yahoo_financials_selected[ticker][statement_type].append({
                timestamp: positions
            })            

In [43]:
file_path = f'data/yahoo/merged_selected_financials.json'
with open(file_path, 'w') as json_file:
    json.dump(yahoo_financials_selected, json_file, indent=4)