# Interial API
This notebook is to develop functions and classes for Interial back end API.

In [18]:
# Import libraries
import yfinance as yf
import pandas as pd

from sklearn.linear_model import LinearRegression

In [101]:
class Stock():
    '''

    This class is to gather information and data about a certain stock.

    methods:
    - get_info
    - get_holders
    - get_calendar
    - get_data

    '''

    def __init__(self, symbol):
        self.symbol = symbol
        self.stock = yf.Ticker(symbol)

    def __create_trendline(self, x, y):
        '''

        Create trendline from the data.

        @param x: Pandas series of datetime like pd.to_datetime(stock_data['Date']).values.astype(float).reshape(-1, 1).
        @param y: Pandas dataframe of price.

        return: Numpy array of predicted y based on x.

        '''

        reg = LinearRegression()

        model = reg.fit(x, y)

        prediction = model.predict(x)

        return prediction

    def __calculate_average(self, df, column):
        '''

        Calculate the average overall value of a stock data or column, excluding Date.

        @param df: Pandas dataframe of the data.
        @param column: String of the column name.

        return: Average value.

        '''

        return df[column].mean()
    
    def __get_stock_data(self, period='3mo', interval='1d', date_range=[]):
        '''

        Get the stock price based on date.

        @param period: The period of the stock data (1d, 5d, 1mo, 3mo, 6mo, 1y, 2y, 5y, 10y, ytd, max).
        @param interval: The interval per data (1m, 2m, 5m, 15m, 30m, 60m, 90m, 1h, 1d, 5d, 1wk, 1mo, 3mo).
        @param date_range: Array of range of string date like ['2020-01-01', '2020-12-31'].

        return: Dataframe of stock data

        '''

        if date_range:
            stock_data = yf.download(
                tickers=self.symbol,
                start=date_range[0],
                end=date_range[1]
            )
        else:
            stock_data = yf.download(
                tickers=self.symbol,
                period=period,
                interval=interval
            )

        return stock_data

    def get_info(self, datapoints=None):
        '''

        Get summary of the stock.

        @param datapoints: Array of datapoints to be returned.

        return: Dictionary containing the summary.

        '''

        if datapoints:
            return {key: value for key, value in self.stock.info.items() if key in datapoints}
        else:
            return self.stock.info

    def get_holders(self, n_result=None):
        '''

        Get the shareholders of the company.

        @param n_result: int to return some of the shareholders, max: 10.

        return: Dictionary of shareholders.

        '''

        shareholders = self.stock.institutional_holders

        shareholders = shareholders.to_dict('index')

        if n_result:
            return dict(list(shareholders.items())[:n_result])
        else:
            return shareholders

    def get_calendar(self):
        '''

        Get the update from the latest schedule.

        return: Dictionary of the update.

        '''

        update = self.stock.calendar.to_dict().get('Value')

        return update

    def get_data(self, period='3mo', interval='1d', date_range=None, trendline=None, average=None):
        '''

        Get the share price of the company.

        @param period: The period of the stock data (1d, 5d, 1mo, 3mo, 6mo, 1y, 2y, 5y, 10y, ytd, max).
        @param interval: The interval per data (1m, 2m, 5m, 15m, 30m, 60m, 90m, 1h, 1d, 5d, 1wk, 1mo, 3mo).
        @param date_range: Array of range of string date like ['2020-01-01', '2020-12-31'].
        @param trendline: String of the name of the column as y for training except for Date.
        @param average: String of the name of the column to get the average of (excluding Date).

        return: Dictionary of the stock data

        '''

        stock_data = self.__get_stock_data(period, interval, date_range)

        # Enabling access to Date
        stock_data = stock_data.reset_index()

        # OHLC Average
        ohlc_avg = [
            (stock_data['Open'].values[i]
            + stock_data['High'].values[i]
            + stock_data['Low'].values[i]
            + stock_data['Close'].values[i])/4
            for i in range(len(stock_data))
        ]

        stock_data['OHLC Average'] = ohlc_avg

        if trendline:
            x = pd.to_datetime(stock_data['Date']).values.astype(float).reshape(-1, 1)
            y = stock_data[[trendline]]
            pred = self.__create_trendline(x, y)

            column_name = trendline + ' Prediction'
            stock_data[column_name] = pd.Series(pred)
        
        if average:
            mean_value = self.__calculate_average(stock_data, average)

            mean_name = average + ' Mean'
            stock_data[mean_name] = [mean_value for i in range(len(stock_data))]

        return stock_data.to_dict('index')


## Testing
The script below is used to test the functionality of the class.

In [102]:
stock = Stock('AAPL')

In [103]:
print(stock.get_info())

{'zip': '95014', 'sector': 'Technology', 'fullTimeEmployees': 147000, 'longBusinessSummary': 'Apple Inc. designs, manufactures, and markets smartphones, personal computers, tablets, wearables, and accessories worldwide. It also sells various related services. The company offers iPhone, a line of smartphones; Mac, a line of personal computers; iPad, a line of multi-purpose tablets; and wearables, home, and accessories comprising AirPods, Apple TV, Apple Watch, Beats products, HomePod, iPod touch, and other Apple-branded and third-party accessories. It also provides AppleCare support services; cloud services store services; and operates various platforms, including the App Store, that allow customers to discover and download applications and digital content, such as books, music, video, games, and podcasts. In addition, the company offers various services, such as Apple Arcade, a game subscription service; Apple Music, which offers users a curated listening experience with on-demand radi

In [104]:
print(stock.get_holders())

{0: {'Holder': 'Vanguard Group, Inc. (The)', 'Shares': 1280669129, 'Date Reported': Timestamp('2020-09-29 00:00:00'), '% Out': 0.0761, 'Value': 148314291829}, 1: {'Holder': 'Blackrock Inc.', 'Shares': 1069771045, 'Date Reported': Timestamp('2020-09-29 00:00:00'), '% Out': 0.0636, 'Value': 123890184721}, 2: {'Holder': 'Berkshire Hathaway, Inc', 'Shares': 944295554, 'Date Reported': Timestamp('2020-09-29 00:00:00'), '% Out': 0.056100000000000004, 'Value': 109358868108}, 3: {'Holder': 'State Street Corporation', 'Shares': 672444836, 'Date Reported': Timestamp('2020-09-29 00:00:00'), '% Out': 0.04, 'Value': 77875836457}, 4: {'Holder': 'FMR, LLC', 'Shares': 346699497, 'Date Reported': Timestamp('2020-09-29 00:00:00'), '% Out': 0.0206, 'Value': 40151268747}, 5: {'Holder': 'Geode Capital Management, LLC', 'Shares': 247838449, 'Date Reported': Timestamp('2020-09-29 00:00:00'), '% Out': 0.0147, 'Value': 28702170778}, 6: {'Holder': 'Price (T.Rowe) Associates Inc', 'Shares': 228869394, 'Date Repo

In [105]:
print(stock.get_calendar())

{'Earnings Date': Timestamp('2021-01-27 00:00:00'), 'Earnings Average': 1.4, 'Earnings Low': 1.23, 'Earnings High': 1.57, 'Revenue Average': 102758000000, 'Revenue Low': 97739000000, 'Revenue High': 110211000000}


In [106]:
print(stock.get_data())

[*********************100%***********************]  1 of 1 completed
{0: {'Date': Timestamp('2020-10-22 00:00:00'), 'Open': 117.44999694824219, 'High': 118.04000091552734, 'Low': 114.58999633789062, 'Close': 115.75, 'Adj Close': 115.55064392089844, 'Volume': 101988000, 'OHLC Average': 116.45749855041504}, 1: {'Date': Timestamp('2020-10-23 00:00:00'), 'Open': 116.38999938964844, 'High': 116.55000305175781, 'Low': 114.27999877929688, 'Close': 115.04000091552734, 'Adj Close': 114.84187316894531, 'Volume': 82572600, 'OHLC Average': 115.56500053405762}, 2: {'Date': Timestamp('2020-10-26 00:00:00'), 'Open': 114.01000213623047, 'High': 116.55000305175781, 'Low': 112.87999725341797, 'Close': 115.05000305175781, 'Adj Close': 114.85185241699219, 'Volume': 111850700, 'OHLC Average': 114.62250137329102}, 3: {'Date': Timestamp('2020-10-27 00:00:00'), 'Open': 115.48999786376953, 'High': 117.27999877929688, 'Low': 114.54000091552734, 'Close': 116.5999984741211, 'Adj Close': 116.39917755126953, 'Volum