 Load libraries and api key

In [6]:
import os
from dotenv import load_dotenv
from tqdm import tqdm

# Load API key from .env file in the root directory
load_dotenv()
API_KEY = os.environ.get('API_KEY')

# 8) continue with the tutorial https://github.com/SimFin/simfin-tutorials/blob/master/01_Basics.ipynb

# matplotlib inline
import pandas as pd

# Import the main functionality from the SimFin Python API.
import simfin as sf

# Import names used for easy access to SimFin's data-columns.
from simfin.names import *

# Set the directory where the data is downloaded
sf.set_data_dir('~/simfin_data/')

# Replace YOUR_API_KEY with your actual API-key.
sf.set_api_key(api_key=API_KEY)

Load and check data

In [15]:
df1 = sf.load(dataset='income', variant='ttm', market='us', refresh_days = 3)
print(df1.head())

Dataset "us-income-ttm" on disk (0 days old).
- Loading from disk ... 

  df = pd.read_csv(path, sep=';', header=0,


Done!
  Ticker  SimFinId Currency  Fiscal Year Fiscal Period Report Date  \
0      A     45846      USD         2017            Q4  2017-10-31   
1      A     45846      USD         2018            Q1  2018-01-31   
2      A     45846      USD         2018            Q2  2018-04-30   
3      A     45846      USD         2018            Q3  2018-07-31   
4      A     45846      USD         2018            Q4  2018-10-31   

  Publish Date Restated Date  Shares (Basic)  Shares (Diluted)  ...  \
0   2017-12-21    2018-12-20     322000000.0       326000000.0  ...   
1   2018-03-06    2019-03-05     322250000.0       325250000.0  ...   
2   2018-05-31    2019-05-30     322500000.0       325500000.0  ...   
3   2018-08-30    2018-08-30     322250000.0       325000000.0  ...   
4   2018-12-20    2020-12-18     321000000.0       325000000.0  ...   

   Non-Operating Income (Loss)  Interest Expense, Net  \
0                  -38000000.0            -57000000.0   
1                  -21000000.0  

Overview of the subset of dataset

In [13]:
num_rows, num_columns = df1.shape
print(f"Number of Rows: {num_rows}")
print(f"Number of Columns: {num_columns}")

# Display basic information about the DataFrame
print("DataFrame Info:")
print(df1.info())

Number of Rows: 57560
Number of Columns: 28
DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 57560 entries, 0 to 57559
Data columns (total 28 columns):
 #   Column                                    Non-Null Count  Dtype  
---  ------                                    --------------  -----  
 0   Ticker                                    57560 non-null  object 
 1   SimFinId                                  57560 non-null  int64  
 2   Currency                                  57560 non-null  object 
 3   Fiscal Year                               57560 non-null  int64  
 4   Fiscal Period                             57560 non-null  object 
 5   Report Date                               57560 non-null  object 
 6   Publish Date                              57560 non-null  object 
 7   Restated Date                             57560 non-null  object 
 8   Shares (Basic)                            57283 non-null  float64
 9   Shares (Diluted)                          572

In [19]:
# Display the first few rows of the DataFrame
print("\nDataFrame Head:")
print(df1.head())



DataFrame Head:
  Ticker  SimFinId Currency  Fiscal Year Fiscal Period Report Date  \
0      A     45846      USD         2017            Q4  2017-10-31   
1      A     45846      USD         2018            Q1  2018-01-31   
2      A     45846      USD         2018            Q2  2018-04-30   
3      A     45846      USD         2018            Q3  2018-07-31   
4      A     45846      USD         2018            Q4  2018-10-31   

  Publish Date Restated Date  Shares (Basic)  Shares (Diluted)  ...  \
0   2017-12-21    2018-12-20     322000000.0       326000000.0  ...   
1   2018-03-06    2019-03-05     322250000.0       325250000.0  ...   
2   2018-05-31    2019-05-30     322500000.0       325500000.0  ...   
3   2018-08-30    2018-08-30     322250000.0       325000000.0  ...   
4   2018-12-20    2020-12-18     321000000.0       325000000.0  ...   

   Non-Operating Income (Loss)  Interest Expense, Net  \
0                  -38000000.0            -57000000.0   
1                  -2

Visualize colums

In [21]:
print(df1.columns)
selected_columns = ['Ticker','Fiscal Year','Fiscal Period','Operating Income (Loss)']

Index(['Ticker', 'SimFinId', 'Currency', 'Fiscal Year', 'Fiscal Period',
       'Report Date', 'Publish Date', 'Restated Date', 'Shares (Basic)',
       'Shares (Diluted)', 'Revenue', 'Cost of Revenue', 'Gross Profit',
       'Operating Expenses', 'Selling, General & Administrative',
       'Research & Development', 'Depreciation & Amortization',
       'Operating Income (Loss)', 'Non-Operating Income (Loss)',
       'Interest Expense, Net', 'Pretax Income (Loss), Adj.',
       'Abnormal Gains (Losses)', 'Pretax Income (Loss)',
       'Income Tax (Expense) Benefit, Net',
       'Income (Loss) from Continuing Operations',
       'Net Extraordinary Gains (Losses)', 'Net Income',
       'Net Income (Common)'],
      dtype='object')


In [22]:
df1[selected_columns]

Unnamed: 0,Ticker,Fiscal Year,Fiscal Period,Operating Income (Loss)
0,A,2017,Q4,841000000
1,A,2018,Q1,864000000
2,A,2018,Q2,873000000
3,A,2018,Q3,897000000
4,A,2018,Q4,889000000
...,...,...,...,...
57555,ZYXI,2021,Q2,6440000
57556,ZYXI,2021,Q3,13077000
57557,ZYXI,2021,Q4,22366000
57558,ZYXI,2022,Q1,25353000
