# Interial API
This notebook is to develop functions and classes for Interial back end API.

In [141]:
# Import libraries
import yfinance as yf
import pandas as pd

from sklearn.linear_model import LinearRegression

In [166]:
class Stock():
    '''

    This class is to gather information and data about a certain stock.

    methods:
    - get_info
    - get_holders
    - get_calendar
    - get_data

    '''

    def __init__(self, symbol):
        self.symbol = symbol
        self.stock = yf.Ticker(symbol)

    def __create_trendline(self, x, y, x_predict):
        '''

        Create trendline from the data.

        @param x: Pandas series of datetime like pd.to_datetime(stock_data['Date']).values.astype(float).reshape(-1, 1).
        @param y: Pandas dataframe of price.
        @param x_predict: Pandas series of datetime to predict.

        return: Numpy array of predicted y based on x.

        '''

        reg = LinearRegression()

        model = reg.fit(x, y)

        prediction = model.predict(x_predict).ravel()

        return prediction

    def __calculate_average(self, df, column):
        '''

        Calculate the average overall value of a stock data or column, excluding Date.

        @param df: Pandas dataframe of the data.
        @param column: String of the column name.

        return: Average value.

        '''

        return df[column].mean()

    def __get_stock_data(self, period='3mo', interval='1d', date_range=[]):
        '''

        Get the stock price based on date.

        @param period: The period of the stock data (1d, 5d, 1mo, 3mo, 6mo, 1y, 2y, 5y, 10y, ytd, max).
        @param interval: The interval per data (1m, 2m, 5m, 15m, 30m, 60m, 90m, 1h, 1d, 5d, 1wk, 1mo, 3mo).
        @param date_range: Array of range of string date like ['2020-01-01', '2020-12-31'].

        return: Dataframe of stock data

        '''

        if date_range:
            stock_data = yf.download(
                tickers=self.symbol,
                start=date_range[0],
                end=date_range[1],
                prepost=True
            )
        else:
            stock_data = yf.download(
                tickers=self.symbol,
                period=period,
                interval=interval,
                prepost=True
            )

        return stock_data

    def get_info(self, datapoints=None):
        '''

        Get summary of the stock.

        @param datapoints: Array of datapoints to be returned.

        return: Dictionary containing the summary.

        '''

        if datapoints:
            return {key: value for key, value in self.stock.info.items() if key in datapoints}
        else:
            return self.stock.info

    def get_holders(self, n_result=None):
        '''

        Get the shareholders of the company.

        @param n_result: int to return some of the shareholders, max: 10.

        return: Dictionary of shareholders.

        '''

        shareholders = self.stock.institutional_holders

        shareholders = shareholders.to_dict('index')

        if n_result:
            return dict(list(shareholders.items())[:n_result])
        else:
            return shareholders

    def get_calendar(self):
        '''

        Get the update from the latest schedule.

        return: Dictionary of the update.

        '''

        update = self.stock.calendar.to_dict().get('Value')

        return update

    def get_data(self, period='3mo', interval='1d', date_range=None, trendline=None, average=None):
        '''

        Get the share price of the company.

        @param period: The period of the stock data (1d, 5d, 1mo, 3mo, 6mo, 1y, 2y, 5y, 10y, ytd, max).
        @param interval: The interval per data (1m, 2m, 5m, 15m, 30m, 60m, 90m, 1h, 1d, 5d, 1wk, 1mo, 3mo).
        @param date_range: Array of range of string date like ['2020-01-01', '2020-12-31'].
        @param trendline: String of the name of the column as y for training except for Date.
        @param average: String of the name of the column to get the average of (excluding Date).

        return: list of the stock data

        '''

        date_column = 'Date'
        ts_format = '%b %d, %Y'

        if interval in ['1m', '2m', '5m', '15m', '30m', '60m', '90m', '1h']:
            date_column = 'Datetime'
            ts_format = '%a %d at %H:%M'

        stock_data = self.__get_stock_data(period, interval, date_range)

        # Enabling access to Date
        stock_data = stock_data.reset_index()

        # Datetime
        stock_data['Time'] = [ts.strftime(ts_format) for i, ts in stock_data[date_column].iteritems()]
        stock_data['Index'] = [i + 1 for i, ts in stock_data[date_column].iteritems()]

        if trendline:
            temp_stock_data = stock_data

            if temp_stock_data.isnull().values.any():
                temp_stock_data = temp_stock_data.dropna()


            x = temp_stock_data[['Index']]
            x_predict = stock_data[['Index']]
            # x = pd.to_datetime(temp_stock_data[date_column]).values.astype(float).reshape(-1, 1)
            # x_predict = pd.to_datetime(stock_data[date_column]).values.astype(float).reshape(-1, 1)

            y = temp_stock_data[[trendline]]

            pred = self.__create_trendline(x, y, x_predict)

            column_name = trendline + ' Prediction'
            stock_data[column_name] = pd.Series(pred)

        if average:
            mean_value = self.__calculate_average(stock_data, average)

            mean_name = average + ' Mean'
            stock_data[mean_name] = [mean_value for i in range(len(stock_data))]

        # Remove date or datetime column
        columns_to_keep = ['Close', 'Time', 'Close Prediction', 'Close Mean']
        stock_data.drop(stock_data.columns.difference(columns_to_keep), 1, inplace=True)

        # Remove NaN value
        stock_data.dropna(inplace=True)

        return stock_data.to_dict('records')


## Testing
The script below is used to test the functionality of the class.

In [167]:
stock = Stock('AAPL')

In [168]:
print(stock.get_info())

{'zip': '95014', 'sector': 'Technology', 'fullTimeEmployees': 147000, 'longBusinessSummary': 'Apple Inc. designs, manufactures, and markets smartphones, personal computers, tablets, wearables, and accessories worldwide. It also sells various related services. The company offers iPhone, a line of smartphones; Mac, a line of personal computers; iPad, a line of multi-purpose tablets; and wearables, home, and accessories comprising AirPods, Apple TV, Apple Watch, Beats products, HomePod, iPod touch, and other Apple-branded and third-party accessories. It also provides AppleCare support services; cloud services store services; and operates various platforms, including the App Store, that allow customers to discover and download applications and digital content, such as books, music, video, games, and podcasts. In addition, the company offers various services, such as Apple Arcade, a game subscription service; Apple Music, which offers users a curated listening experience with on-demand radi

In [169]:
print(stock.get_holders())

{0: {'Holder': 'Vanguard Group, Inc. (The)', 'Shares': 1280669129, 'Date Reported': Timestamp('2020-09-29 00:00:00'), '% Out': 0.07629999999999999, 'Value': 148314291829}, 1: {'Holder': 'Blackrock Inc.', 'Shares': 1056461286, 'Date Reported': Timestamp('2020-12-30 00:00:00'), '% Out': 0.0629, 'Value': 140181848039}, 2: {'Holder': 'Berkshire Hathaway, Inc', 'Shares': 944295554, 'Date Reported': Timestamp('2020-09-29 00:00:00'), '% Out': 0.0562, 'Value': 109358868108}, 3: {'Holder': 'State Street Corporation', 'Shares': 672444836, 'Date Reported': Timestamp('2020-09-29 00:00:00'), '% Out': 0.0401, 'Value': 77875836457}, 4: {'Holder': 'FMR, LLC', 'Shares': 358551697, 'Date Reported': Timestamp('2020-12-30 00:00:00'), '% Out': 0.021400000000000002, 'Value': 47576224674}, 5: {'Holder': 'Geode Capital Management, LLC', 'Shares': 247838449, 'Date Reported': Timestamp('2020-09-29 00:00:00'), '% Out': 0.0148, 'Value': 28702170778}, 6: {'Holder': 'Price (T.Rowe) Associates Inc', 'Shares': 228869

In [170]:
print(stock.get_calendar())

None


In [171]:
print(stock.get_data())

[*********************100%***********************]  1 of 1 completed
[{'Open': 118.63999938964844, 'High': 118.7699966430664, 'Low': 117.29000091552734, 'Close': 117.33999633789062, 'Adj Close': 117.1649169921875, 'Volume': 73604300, 'OHLC Average': 118.0099983215332, 'Time': '20201120000000'}, {'Open': 117.18000030517578, 'High': 117.62000274658203, 'Low': 113.75, 'Close': 113.8499984741211, 'Adj Close': 113.68012237548828, 'Volume': 127959300, 'OHLC Average': 115.60000038146973, 'Time': '20201123000000'}, {'Open': 113.91000366210938, 'High': 115.8499984741211, 'Low': 112.58999633789062, 'Close': 115.16999816894531, 'Adj Close': 114.99815368652344, 'Volume': 113874200, 'OHLC Average': 114.3799991607666, 'Time': '20201124000000'}, {'Open': 115.55000305175781, 'High': 116.75, 'Low': 115.16999816894531, 'Close': 116.02999877929688, 'Adj Close': 115.85687255859375, 'Volume': 76499200, 'OHLC Average': 115.875, 'Time': '20201125000000'}, {'Open': 116.56999969482422, 'High': 117.489997863769