In [1]:
import requests
import pandas as pd
import time
import simfin as sf
from simfin.names import *

In [2]:
sf.set_api_key('xxxxxxxxxxxxxxxxxxxxxxx')
# Set the local directory where data-files are stored.
# The dir will be created if it does not already exist.
sf.set_data_dir('~/simfin_data/')

In [7]:
# Load the annual Income Statements for all companies in the US.
# The data is automatically downloaded if you don't have it already.
df = sf.load_income(variant='quarterly', market='us')
df_income = df.copy()

Dataset "us-income-quarterly" on disk (0 days old).
- Loading from disk ... Done!


In [8]:
#indexing
df_income = sf.load(dataset='income', variant='quarterly', market='us',
              index=[TICKER, REPORT_DATE],
              parse_dates=[REPORT_DATE, PUBLISH_DATE, RESTATED_DATE])

Dataset "us-income-quarterly" on disk (0 days old).
- Loading from disk ... Done!


In [11]:
#Droping not needed columns write inplace = true for permanent delete
to_drop = ['Shares (Diluted)', 'Net Income (Common)']
df_income.drop(columns=to_drop, inplace=True)

In [None]:
#define columns, id vars, value vars
id_vars = list(df_income.columns)[ : 13]
id_vars.append((df_income.columns)[[15, 18, 19, 22]])
id_vars

In [16]:
#Melting losses into Losses column
df_melted = df_income.copy()
df_melted = pd.melt(df_melted, id_vars=['SimFinId', 'Currency', 'Fiscal Period', 'Fiscal Year', 'Publish Date', 'Restated Date', \
    'Shares (Basic)', 'Revenue', 'Cost of Revenue', 'Gross Profit', 'Operating Expenses', 'Selling, General & Administrative', \
        'Research & Development', 'Depreciation & Amortization','Interest Expense, Net', 'Income Tax (Expense) Benefit, Net', 'Net Income'], value_vars=['Operating Income (Loss)', \
            'Non-Operating Income (Loss)','Pretax Income (Loss), Adj.','Pretax Income (Loss)', \
                'Abnormal Gains (Losses)','Income (Loss) from Continuing Operations','Net Extraordinary Gains (Losses)'], value_name='Losses', ignore_index=False)

In [17]:
df_melted.drop(columns=['Currency'], inplace=True)

In [79]:
#null count - optional
df_null = df_melted.copy()
id_vars = list(df_null.columns)[ : 23]
df_null['num nulls'] = df_null[id_vars].isnull().sum(axis=1)
df_null.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,SimFinId,Fiscal Year,Publish Date,Restated Date,Shares (Basic),Revenue,Cost of Revenue,Gross Profit,Operating Expenses,"Selling, General & Administrative",Research & Development,Depreciation & Amortization,"Interest Expense, Net","Income Tax (Expense) Benefit, Net",Net Income,Losses,num nulls
Ticker,Report Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
A,2016-10-31,45846,2016,2016-12-20,2018-12-20,326000000.0,4202000000.0,-2005000000.0,2197000000.0,-1582000000.0,-1253000000.0,-329000000.0,,-61000000.0,-82000000.0,462000000,615000000.0,1
A,2017-10-31,45846,2017,2017-12-21,2018-12-20,322000000.0,4472000000.0,-2063000000.0,2409000000.0,-1568000000.0,-1229000000.0,-339000000.0,,-57000000.0,-119000000.0,684000000,841000000.0,1
A,2018-10-31,45846,2018,2018-12-20,2020-12-18,321000000.0,4914000000.0,-2234000000.0,2680000000.0,-1776000000.0,-1389000000.0,-387000000.0,,-37000000.0,-630000000.0,316000000,904000000.0,1
A,2019-10-31,45846,2019,2019-12-19,2021-12-17,314000000.0,5163000000.0,-2358000000.0,2805000000.0,-1864000000.0,-1460000000.0,-404000000.0,,-38000000.0,152000000.0,1071000000,941000000.0,1
A,2020-10-31,45846,2020,2020-12-18,2021-12-17,309000000.0,5339000000.0,-2502000000.0,2837000000.0,-1991000000.0,-1496000000.0,-495000000.0,,-70000000.0,-123000000.0,719000000,846000000.0,1


In [19]:
#replace null values
df_melted = df_melted.fillna(0)
df_melted

Unnamed: 0_level_0,Unnamed: 1_level_0,SimFinId,Fiscal Period,Fiscal Year,Publish Date,Restated Date,Shares (Basic),Revenue,Cost of Revenue,Gross Profit,Operating Expenses,"Selling, General & Administrative",Research & Development,Depreciation & Amortization,"Interest Expense, Net","Income Tax (Expense) Benefit, Net",Net Income,variable,Losses
Ticker,Report Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
A,2016-10-31,45846,Q4,2016,2016-12-20,2018-12-20,324000000.0,1.111000e+09,-523000000.0,588000000.0,-405000000.0,-321000000.0,-84000000.0,0.0,-16000000.0,-25000000.0,126000000,Operating Income (Loss),183000000.0
A,2017-01-31,45846,Q1,2017,2017-03-08,2018-03-06,322000000.0,1.067000e+09,-493000000.0,574000000.0,-368000000.0,-289000000.0,-79000000.0,0.0,-16000000.0,-25000000.0,168000000,Operating Income (Loss),206000000.0
A,2017-04-30,45846,Q2,2017,2017-06-06,2018-05-31,321000000.0,1.102000e+09,-510000000.0,592000000.0,-391000000.0,-307000000.0,-84000000.0,0.0,-15000000.0,-27000000.0,164000000,Operating Income (Loss),201000000.0
A,2017-07-31,45846,Q3,2017,2017-09-06,2018-08-30,321000000.0,1.114000e+09,-518000000.0,596000000.0,-395000000.0,-308000000.0,-87000000.0,0.0,-13000000.0,-18000000.0,175000000,Operating Income (Loss),201000000.0
A,2017-10-31,45846,Q4,2017,2017-12-21,2018-12-20,324000000.0,1.189000e+09,-542000000.0,647000000.0,-414000000.0,-325000000.0,-89000000.0,0.0,-13000000.0,-49000000.0,177000000,Operating Income (Loss),233000000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZYXI,2020-09-30,171401,Q3,2020,2020-10-27,2021-07-29,37945600.0,2.002600e+07,-4296000.0,15730000.0,-14321000.0,-14321000.0,0.0,0.0,-5000.0,-71000.0,1333000,Net Extraordinary Gains (Losses),0.0
ZYXI,2020-12-31,171401,Q4,2020,2021-02-25,2021-11-02,38262400.0,2.560500e+07,-5659000.0,19946000.0,-17649000.0,-17649000.0,0.0,0.0,-5000.0,-428000.0,1787000,Net Extraordinary Gains (Losses),0.0
ZYXI,2021-03-31,171401,Q1,2021,2021-04-29,2022-04-28,38320700.0,2.412700e+07,-5886000.0,18241000.0,-19322000.0,-19322000.0,0.0,0.0,-9000.0,384000.0,-706000,Net Extraordinary Gains (Losses),0.0
ZYXI,2021-06-30,171401,Q2,2021,2021-07-29,2022-04-28,38320700.0,3.102200e+07,-7267000.0,23755000.0,-19940000.0,-19940000.0,0.0,0.0,-45000.0,-962000.0,2808000,Net Extraordinary Gains (Losses),0.0


In [20]:
df_melted.drop(columns=['variable'], inplace=True)

In [15]:
# add id column
df_melted.insert(0, 'id', range(1, 1 + len(df_melted)))

ValueError: cannot insert id, already exists

In [22]:
# clean columns names
df_melted.columns = [x.lower().replace(" ", "_").replace("?", "").replace("-", "_") \
    .replace(r"/","_").replace("\\","_").replace("%","_per").replace(")","") \
         .replace(r"(","").replace("$","").replace(":", "").replace(",","") for x in df_melted.columns]
df_melted.columns

Index(['simfinid', 'fiscal_period', 'fiscal_year', 'publish_date',
       'restated_date', 'shares_basic', 'revenue', 'cost_of_revenue',
       'gross_profit', 'operating_expenses',
       'selling_general_&_administrative', 'research_&_development',
       'depreciation_&_amortization', 'interest_expense_net',
       'income_tax_expense_benefit_net', 'net_income', 'losses'],
      dtype='object')

In [23]:
#save to csv
df_melted.to_csv('csv/income_statement.csv', index=True, sep=';', encoding='utf-8', float_format='%.0f')
