**Imports**

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime
import pickle
import time
import missingno as msno
from statistics import mean
from collections import Counter, OrderedDict
from IPython.display import display
import import_ipynb
import UTILS as utils

import FundamentalAnalysis as fa

In [None]:
pd.set_option("display.max_rows", None, "display.max_columns", None)

In [None]:
api_key = "Here would go my personal API key"

--------

**Load data**

*Files with data from year with displacement -X*

In [84]:
file1 = 'DATA_ttm/S_P500_data_ttm_Xyeardisp/S_P500_dataframe_with_ratios_ttm_Xyeardisp.csv'
file2 = 'DATA_ttm/S_P500_data_ttm_Xyeardisp/S_P500_companies_financials_data_ttm_Xyeardisp.pickle'

*Files with data from year with displacement -(X+1) (since in this method we take into account the previous year to do comparisons)*

In [None]:
file3 = 'DATA_ttm/S_P500_data_ttm_X+1yeardisp/S_P500_dataframe_with_ratios_ttm_X+1yeardisp.csv'
file4 = 'DATA_ttm/S_P500_data_ttm_X+1yeardisp/S_P500_companies_financials_data_ttm_X+1yeardisp.pickle'

In [None]:
df = data_loaded[0]
dict_companies = data_loaded[1]

df_1 = data_loaded[2]
dict_companies_1 = data_loaded[3]

----------

*Check if we have all the ratios needed for the computations and make sure they don't have NaN values*

In [59]:
necessary_ratios = ['_return_on_assets', '_operating_cash_flow_ratio', '_current_ratio', '_gross_margin', '_total_asset_turnover']

In [60]:
if not len(set(necessary_ratios).intersection(set(df.columns.to_list()))) == len(necessary_ratios):
    raise Exception("CAN'T CONTINUE, YOU ARE MISSING SOME NECESSARY RATIO IN YOUR DATA")
else:
    print('CONTINUE')

CONTINUE


-----------------

**Compute Piotroski scores**

In [9]:
def compute_profitability_scores(df, dict_companies, df_1, dict_companies_1, disp):
    
    scores = dict()
    
    return_on_assets_positive = df._return_on_assets > 0
    scores['return_on_assets']  = list(return_on_assets_positive[return_on_assets_positive].index)
    
    operating_cash_flow_positive = df._operating_cash_flow_ratio > 0
    scores['operating_cash_flow']  = list(operating_cash_flow_positive[operating_cash_flow_positive].index)
    
    change_return_on_assets_positive = df._return_on_assets > df_1._return_on_assets
    scores['change_return_on_assets'] = list(change_return_on_assets_positive[change_return_on_assets_positive].index)
    
    accruals_positive = []
    for ticker in df.index:
        cash_flow_statement = dict_companies[ticker]['_Company__cash_flow_statement']
        balance_sheet = dict_companies[ticker]['_Company__balance_sheet']
        ocf_ta = cash_flow_statement[list(cash_flow_statement.columns[4*disp:4+(4*disp)])].loc['operatingCashFlow'].mean()/\
        balance_sheet[list(balance_sheet.columns[4*disp:4+(4*disp)])].loc['totalAssets'].mean()
        if ocf_ta > df.loc[ticker, '_return_on_assets']: accruals_positive.append(ticker)
    scores['accruals'] = accruals_positive
        
    profitability_scores = {ticker:sum(1 for v in scores.values() if ticker in v) for ticker in df.index}
        
    return profitability_scores

In [10]:
def compute_leverage_liquidity_scores(df, dict_companies, df_1, dict_companies_1, disp):
    
    scores = dict()
    
    change_long_term_positive = []
    for ticker in df.index:
        #Both balance sheets are equal since we didn't took into consideration the time displacement when storing them
        balance_sheet = dict_companies[ticker]['_Company__balance_sheet']
        balance_sheet_1 = dict_companies_1[ticker]['_Company__balance_sheet']
        if balance_sheet[list(balance_sheet.columns[4*disp:4+(4*disp)])].loc['longTermDebt'].mean() <\
                                        balance_sheet_1[list(balance_sheet_1.columns[4+(4*disp):8+(4*disp)])].loc['longTermDebt'].mean():
            change_long_term_positive.append(ticker)
    scores['change_long_term'] = change_long_term_positive
    
    
    change_current_ratio_positive = df._current_ratio > df_1._current_ratio
    scores['change_return_on_assets'] = list(change_current_ratio_positive[change_current_ratio_positive].index)
    
    
    change_number_shares_positive = []
    for ticker in df.index:
        enterprise_value = dict_companies[ticker]['_Company__enterprise_value']
        enterprise_value_1 = dict_companies_1[ticker]['_Company__enterprise_value']
        try:
            try:
                if enterprise_value.loc['numberOfShares', enterprise_value.columns[4*disp]] <= enterprise_value_1.loc['numberOfShares', enterprise_value_1.columns[4+(4*disp)]]:
                    change_number_shares_positive.append(ticker)
            except:
                if enterprise_value.loc['numberOfShares', enterprise_value.columns[4*disp]] <= enterprise_value_1.loc['numberOfShares', enterprise_value_1.columns[(len(enterprise_value_1.columns)-1)]]:
                    change_number_shares_positive.append(ticker)
        except: pass
    scores['change_number_shares'] = change_number_shares_positive
    
    
    leverage_liquidity_scores = {ticker:sum(1 for v in scores.values() if ticker in v) for ticker in df.index}
        
    return leverage_liquidity_scores       
    

In [11]:
def compute_operating_efficiency_scores(df, dict_companies, df_1, dict_companies_1):
    
    scores = dict()
    
    change_gross_margin_positive = df._gross_margin > df_1._gross_margin
    scores['change_gross_margin'] = list(change_gross_margin_positive[change_gross_margin_positive].index)
    
    
    change_asset_turnover_positive = df._total_asset_turnover > df_1._total_asset_turnover
    scores['change_asset_turnover'] = list(change_asset_turnover_positive[change_asset_turnover_positive].index)

    
    operating_efficiency_scores = {ticker:sum(1 for v in scores.values() if ticker in v) for ticker in df.index}
        
    return operating_efficiency_scores   
    

In [12]:
def compute_piotroski_f_score(df, dict_companies, df_1, dict_companies_1, sort_by_score=False, year_disp=0):
    
    profitability_scores = compute_profitability_scores(df, dict_companies, df_1, dict_companies_1, year_disp)
    
    leverage_liquidity_scores = compute_leverage_liquidity_scores(df, dict_companies, df_1, dict_companies_1, year_disp)
    
    operating_efficiency_scores = compute_operating_efficiency_scores(df, dict_companies, df_1, dict_companies_1)
    
    if (profitability_scores.keys()==leverage_liquidity_scores.keys() and leverage_liquidity_scores.keys()==operating_efficiency_scores.keys()):
        piotroski_f_score_dict = dict(Counter(profitability_scores) + Counter(leverage_liquidity_scores) + Counter(operating_efficiency_scores))
        '''
        piotroski_f_score = pd.DataFrame(piotroski_f_score_dict.items(), columns=['ticker', 'piotroski_f_score']).set_index('ticker')
        if sort_by_score:
            piotroski_f_score = piotroski_f_score.sort_values('piotroski_f_score', ascending=False)
        '''
        return piotroski_f_score_dict   #if you uncomment the lines above, you would return piotroski_f_score dataframe
    else:
        print('An error occurred')

In [85]:
scores_by_sector = dict()
sectors = utils.get_sectors_and_industries(df, dict_companies)[0]
for sector in sectors:
    sector_df = utils.get_sector_companies(df, dict_companies, sector)
    sector_df_1 = utils.get_sector_companies(df_1, dict_companies_1, sector)
    if len(np.where((sector_df.index == sector_df_1.index)==False)[0])!=0:
        raise Exception('PROBLEM!!! Tickers of both dataframes should be the same')
    dict_sector = {k: v for k, v in dict_companies.items() if k in list(sector_df.index)}
    dict_sector_1 = {k: v for k, v in dict_companies.items() if k in list(sector_df_1.index)}
    scores_by_sector[sector] = compute_piotroski_f_score(sector_df, dict_sector, sector_df_1, dict_sector_1, True, year_disp=0)

In [None]:
scores_by_sector.keys()

In [89]:
with open(f'models_outputs/piotroskiFscore_output/X_year_disp/scores_by_sector_Xyear.pickle', 'wb') as file:
    pickle.dump(scores_by_sector, file)

**Retrieve the saved dictionary**

In [None]:
scores_by_sector_dict = dict()
with open('models_outputs/piotroskiFscore_output/X_year_disp/scores_by_sector_Xyear.pickle','rb') as file:
    raw_data = file.read()
    scores_by_sector_dict.update(pickle.loads(raw_data))

In [None]:
scores_by_sector = dict()
for sector in scores_by_sector_dict.keys():
    scores_by_sector[sector] = pd.DataFrame.from_dict(scores_by_sector_dict[sector], orient='index', columns=['piotroski_f_score'])

In [None]:
scores_by_sector['Industrials']