In [None]:
# pyright: reportMissingModuleSource=false
import sys
import os
import time
import logging
import datetime
import json
from datetime import datetime, timedelta
import yaml
import importlib
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import requests
import pandas_gbq
from dreams_core.googlecloud import GoogleCloud as dgc
from dreams_core import core as dc
import matplotlib.pyplot as plt
from scipy.signal import argrelextrema
import progressbar


# load dotenv
load_dotenv()


# import local files if necessary
# pyright: reportMissingImports=false
sys.path.append('..//src')
from utils import load_config, cw_filter_df, create_progress_bar
import training_data as td
importlib.reload(td)
import feature_engineering as fe
importlib.reload(fe)
import coin_wallet_metrics as cwm
importlib.reload(cwm)
import modeling as m
importlib.reload(m)
import insights as i
importlib.reload(i)
import utils as u
importlib.reload(u)

# load configs
config = load_config('../config/config.yaml')
metrics_config = load_config('../config/metrics_config.yaml')
modeling_config = load_config('../config/modeling_config.yaml')
experiments_config = load_config('../config/experiments_config.yaml')

# configure logger
logger = dc.setup_logger()
logger.setLevel(logging.INFO)

# Custom format function for displaying numbers
pd.set_option('display.float_format', lambda x: f'{x:.12g}')
# pd.reset_option('display.float_format')

In [None]:
importlib.reload(td)
importlib.reload(cwm)
importlib.reload(fe)
importlib.reload(m)
importlib.reload(i)
config = load_config('../config/config.yaml')
metrics_config = load_config('../config/metrics_config.yaml')
modeling_config = load_config('../config/modeling_config.yaml')
experiments_config = load_config('../config/experiments_config.yaml')


In [None]:
def retrieve_google_trends_data():
    """
    Retrieves google trends data from the macro_trends dataset.

    Returns:
    - google_trends_df: DataFrame containing google trends values for multiple search terms, keyed
        on week start date
    """
    query_sql = '''
        select *
        from `macro_trends.google_trends`
        order by date
    '''

    # Run the SQL query using dgc's run_sql method
    google_trends_df = dgc().run_sql(query_sql)
    logger.info('retrieved Google Trends data with shape %s',google_trends_df.shape)

    # Convert the date column to datetime format
    google_trends_df['date'] = pd.to_datetime(google_trends_df['date'])

    # Resample the df to fill in missing days by using date as the index
    google_trends_df.set_index('date', inplace=True)
    google_trends_df = google_trends_df.resample('D').interpolate(method='linear')
    google_trends_df.reset_index(inplace=True)

    return google_trends_df

google_trends_df = td.retrieve_google_trends_data()
google_trends_df.shape

In [None]:
google_trends_df

In [None]:
google_trends_df.head()

## Prices Metrics

In [None]:
dataset_category = 'time_series'
dataset_name = 'prices'



# load configs
dataset_config = config['datasets'][dataset_category][dataset_name]
dataset_metrics_config = metrics_config[dataset_category][dataset_name]


# generate prices metrics metrics
prices_metrics_df,partial_prices_metrics_df = cwm.generate_time_series_metrics(
    prices_df,
    config,
    metrics_config,
    dataset_key=dataset_name,
    colname=dataset_config['colname']
)

# flatten, save, and preprocess the flattened df
flattened_output_directory = os.path.join(modeling_config['modeling']['modeling_folder'],'outputs/flattened_outputs')

flattened_prices_metrics_df = fe.flatten_coin_date_df(
    prices_metrics_df,
    prices_metrics_config,
    config['training_data']['training_period_end']
)
flattened_prices_metrics_df, flattened_prices_metrics_filepath = fe.save_flattened_outputs(
    flattened_prices_metrics_df,
    flattened_output_directory,
    prices_dataset_config['description'],
    config['training_data']['modeling_period_start']
)
prices_preprocessed_df, prices_preprocessed_filepath = fe.preprocess_coin_df(
    flattened_prices_metrics_filepath
    ,modeling_config
    ,prices_dataset_config
    ,prices_metrics_config
)


prices_tuple = (prices_preprocessed_filepath.split('preprocessed_outputs/')[1], dataset_config['fill_method'])