In [3]:
def get_full_dataset():
    """
        Load in the full dataset scraped from btcinfocharts
        Extract and return only a dataframe with the selected features
    """

    import pandas as pd

    path = './Merged_Unconverted_BTC_Data.csv'
    full_df = pd.read_csv(path)

    features = [
            'median_transaction_fee3momUSD',
            'fee_to_reward7momUSD',
            'top100cap7mom',
            'mining_profitability7rsi',
            'top100cap14mom',
            'price3wmaUSD',
            'transactionvalue90emaUSD',
            'difficulty30sma',
            'fee_to_reward90smaUSD'
            ]

    # combine date and features
    columns = ['Date'] + features

    # extract only the data from the features desired
    select_df = full_df[columns]

    return select_df


def preprocess_the_data():
    """
        Takes in a dataframe of all the data
        to fit the scaler using MinMaxScaler and RobustScaler
        Then transform the new data using the scaler and retun it as a vector
    """
    
    from btcinfocharts_scraper import grab_the_data
    import pickle
    
    # get the pickle file scaler
    infile = open('./scaler/final_scaler.pkl', 'rb')
    scale = pickle.load(infile)
    infile.close()
    
    # get the new dataset for the most recent data
    new_df, todays_date = grab_the_data()

    # scale the new data
    transformed_new_data = scale.transform(new_df)

    return transformed_new_data, todays_date


def prediction():

    """
        Takes in the current data scraped from btcinfocharts.org
        and returns a price prediction for tomorrow
        Output is a 2D array
        Example array([[59300.715]], dtype=float32)
    """

    from keras.models import load_model

    # load the best model from the training and testing
    ann_model = load_model('./trained_models/ANN4_reg_nextday300Adam0-01relu64Int4_341.hdf5')

    # load the current scaled data
    current_scaled_data, todays_date = preprocess_the_data()

    # make the prediction
    pred_next_day_price = ann_model.predict(current_scaled_data)

    return pred_next_day_price, todays_date



prediction()

All of the data for today 2022-04-04 is available.
Today's 2022-04-04 data will be used


(array([[52731.05]], dtype=float32), '2022/04/04')

In [4]:
x,  y = preprocess_the_data()

All of the data for today 2022-04-04 is available.
Today's 2022-04-04 data will be used


In [5]:
x

array([[ 7.46666667e+01,  3.60919540e+00,  7.06590650e+01,
        -4.01275569e+00,  4.46105112e+01,  6.29118702e+00,
         2.09676047e+01,  4.56000570e+00,  2.27389517e-02]])

In [6]:
y

'2022/04/04'