In [None]:
import import_ipynb
import datetime as dt
import pandas as pd
import numpy as np
import os
import json
from  save_tickers import save_sp500_tickers
from yahoofinancials import YahooFinancials 
from pandas_datareader import data as web
# from logs import Utils

#financial statements 
class sp500_financial():
    def __init__(self,stmt_type=None,frequency=None,tickers=None):
        self.type = stmt_type
        self.frequency = frequency
        self.tickers = tickers

    # create folders to store data
    def mk_dir(self):
        #if not os.path.exists('sp500_fundamentals_dfs'):
            #os.makedirs('sp500_fundamentals_dfs')

        # create folders for financial statements on dates added
        if self.type in ('balance','cash','income'):
            if not os.path.exists('sp500_fundamentals_dfs'):
                os.makedirs('sp500_fundamentals_dfs')

        #create folders for key_statistics on dates added
        if self.type == 'key_statistics':
            if not os.path.exists('sp500_key_statistics_dfs'):
                os.makedirs('sp500_key_statistics_dfs')

        if self.type == 'market_cap':
            if not os.path.exists('sp500_market_cap_dfs'):
                os.makedirs('sp500_market_cap_dfs')

    # make calls to yahoo
    def call_yahoo(self, ticker):
        if self.type == 'balance': 
           balance_sheets = YahooFinancials(ticker).get_financial_stmts(self.frequency,self.type)
           if self.frequency == 'quarterly': balance_sheets = balance_sheets['balanceSheetHistoryQuarterly']
           else: balance_sheets = balance_sheets['balanceSheetHistory']
           return balance_sheets
    
        elif self.type == 'cash': 
            cashflow_stmts = YahooFinancials(ticker).get_financial_stmts(self.frequency,self.type)
            if self.frequency == 'quarterly': cashflow_stmts = cashflow_stmts['cashflowStatementHistoryQuarterly']
            else: cashflow_stmts = cashflow_stmts['cashflowStatementHistory']
            return cashflow_stmts

        elif self.type == 'income': 
            income_stmts = YahooFinancials(ticker).get_financial_stmts(self.frequency,self.type)
            if self.frequency == 'quarterly': income_stmts = income_stmts['incomeStatementHistoryQuarterly']
            else: income_stmts = income_stmts['incomeStatementHistory']
            return income_stmts

        elif self.type == 'key_statistics': 
            key_stats = YahooFinancials(ticker).get_key_statistics_data()
            return key_stats

    # get financial statements
    def get_stmts_df(self):
        # initiate tickers
        if self.tickers:
            tickers = self.tickers
        else:
            #with open('sp500tickers.pickle','rb') as f:
                #tickers = pickle.load(f)
            tickers = save_sp500_tickers()

        # create folders and store data
        self.mk_dir()

        # Fetch and clean data 
        print('\n# Start Fetching {} {} for {} stocks'.format(self.type, self.frequency, len(tickers)))
        for ticker in tickers:
            ticker = ticker.replace('.','')

            try:
                stmt_df = self.call_yahoo(ticker)
            except:
                # ignore company with no specified financial statment 
                print('Failed: {}'.format(ticker))
                continue
            
            # access first level dict
            stmt_df = stmt_df[ticker]
            
            # if statement is none, continue
            if stmt_df == None:
                continue

            for j in range(len(stmt_df)):
                # access financial statement on specified date 
                df = stmt_df[j]

                # get date for financial statement
                k = [k for k in df.keys()][0]

                # add ticker and date to dict
                df = df[k]
                df['ticker'] = ticker
                df['date'] = k
                df = json.dumps(df) + '\n'

                # append data to same json file 
                with open('sp500_fundamentals_dfs/{}_{}_{}.json'.format(str(dt.date.today()), self.type, self.frequency), 'a') as outfile:
                    outfile.write(df)

            # track ticker progress
            print('Wrote:',ticker)
        
        print('Finished!!')
        
    # get key statistics
    def get_key_stats_df(self):
        # initiate tickers
        if self.tickers:
            tickers = self.tickers
        else:
            # with open('sp500tickers.pickle','rb') as f:
                # tickers = pickle.load(f)
            tickers = save_sp500_tickers()

       # create folders and store data
        self.mk_dir()

        print('\n# Start Fetching {} for {} stocks'.format(self.type, len(tickers)))
        for ticker in tickers:
            ticker = ticker.replace('.','')
            df = self.call_yahoo(ticker)
            
            # detect empty financial statements after API call
            try:
                df = self.call_yahoo(ticker)
            except:
                # ignore company with no specified financial statment 
                print('Failed: {}'.format(ticker))
                continue
            
            # access key statistics for each ticker
            df = df[ticker]
            df['ticker'] = ticker
            df['date'] = str(dt.date.today())
            df = json.dumps(df) + '\n'

            # append data to same json file 
            with open('sp500_key_statistics_dfs/{}_{}.json'.format(str(dt.date.today()), self.type), 'a') as outfile:
                outfile.write(df)

            # keep track progress
            print('Wrote:',ticker)

        print('Finished!!')

    # get market capitalizations
    def get_market_cap_df(self):
        # initiate tickers
        if self.tickers:
            tickers = self.tickers
        else:
            tickers = save_sp500_tickers()

        print('\n# Start Fetching {}'.format('market capitalization for sp500'))

        # create a main dataframe to store all market capitals
        main_df = pd.DataFrame()
        
        # list to store failed attempts on tickers (due to no market capital)
        fail_list = []
        
        # iterate each ticker
        for ticker in tickers:
            # track progress
            ticker = ticker.replace('.','')

            try:
                # make call to get market capital
                market_cap = web.get_quote_yahoo(ticker)['marketCap']
                print(ticker)

            except:
                # print failed ticker and append to fail list 
                print('Failed: {}'.format(ticker))
                fail_list.append(ticker)

                # ignore ticker with no market captial
                continue

            # build ticker and marketCap dataframe for single ticker
            market_cap = pd.DataFrame(market_cap)
            market_cap.reset_index(inplace=True)
            market_cap.columns = ['ticker','marketCap']

            # concatenate to main dataframe
            main_df = pd.concat([main_df,market_cap],axis=0,sort=False)
        
        # add date to main dataframe and set as index
        main_df['date'] = dt.date.today()
        main_df.set_index(['date'],inplace=True)

        print('Finished!')

        # output csv file
        self.mk_dir()
        main_df.to_csv('sp500_market_cap_dfs/{}_market_cap.csv'.format(str(dt.date.today())))
        print('Loaded!\n')

        # print failed list
        print('Ticker Failed List:', fail_list)

        return main_df


if __name__ == '__main__':
    # testing tickers
    #tickers = ['MSFT','TSLA','AMZN']

    # initiate SP500 annually financial statement objects
    balance_annual = sp500_financial('balance',frequency='annual',tickers=['AAPL', 'TSLA', 'F'])
    cash_annual = sp500_financial('cash',frequency='annual')
    income_annual = sp500_financial('income',frequency='annual')
    
    # initiate sp500 quarterly financial statements objects
    balance_quarter = sp500_financial('balance',frequency='quarterly')
    cash_quarter = sp500_financial('cash',frequency='quarterly')
    income_quarter = sp500_financial('income',frequency='quarterly')

    # initiate sp500 key statistics object
    key = sp500_financial('key_statistics',tickers=['AAPL', 'TSLA', 'F'])

    # get financial statements and key statistics 
    # store in csv files and return pandas dataframes
    # keep track of progress

    # call objects and get annual financial statements
    balance_annual.get_stmts_df()
    '''cash_annual_df = cash_annual.get_stmts_df()
    income_annual_df = income_annual.get_stmts_df()'''

    # call objects and get quarterly financial statements
    '''balance_quarter_df = balance_quarter.get_stmts_df()
    cash_quarter_df = cash_quarter.get_stmts_df()
    income_quarter_df = income_quarter.get_stmts_df()'''

    # call objects and get key statistics
    key.get_key_stats_df()

    # call objects and get tickers and sectors
    '''ticker_sector = sp500_financial('industry_sector')
    ticker_sector_df = ticker_sector.get_ticker_sector_df()'''

    # call objects and get market capitalization
    '''market_cap = sp500_financial('market_cap')
    market_cap_df = market_cap.get_market_cap_df()'''



