In [None]:
# pyright: reportMissingModuleSource=false
import sys
import os
import time
import logging
import datetime
import json
from datetime import datetime, timedelta
import yaml
import importlib
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import requests
import pandas_gbq
from dreams_core.googlecloud import GoogleCloud as dgc
from dreams_core import core as dc
import matplotlib.pyplot as plt
from scipy.signal import argrelextrema
import progressbar


# load dotenv
load_dotenv()


# import local files if necessary
# pyright: reportMissingImports=false
sys.path.append('..//src')
from utils import load_config, cw_filter_df, create_progress_bar
import training_data as td
importlib.reload(td)
import feature_engineering as fe
importlib.reload(fe)
import coin_wallet_metrics as cwm
importlib.reload(cwm)
import modeling as m
importlib.reload(m)
import insights as i
importlib.reload(i)
import utils as u
importlib.reload(u)

# load configs
config = load_config('../config/config.yaml')
metrics_config = load_config('../config/metrics_config.yaml')
modeling_config = load_config('../config/modeling_config.yaml')
experiments_config = load_config('../config/experiments_config.yaml')

# configure logger
logger = dc.setup_logger()
logger.setLevel(logging.INFO)

# Custom format function for displaying numbers
pd.set_option('display.float_format', lambda x: f'{x:.12g}')
# pd.reset_option('display.float_format')

In [None]:
importlib.reload(td)
importlib.reload(cwm)
importlib.reload(fe)
importlib.reload(m)
importlib.reload(i)
config = load_config('../config/config.yaml')
metrics_config = load_config('../config/metrics_config.yaml')
modeling_config = load_config('../config/modeling_config.yaml')
experiments_config = load_config('../config/experiments_config.yaml')


## Google Trends sequence

In [None]:
google_trends_df

In [None]:
importlib.reload(td)
importlib.reload(cwm)
importlib.reload(fe)
importlib.reload(m)
importlib.reload(i)
config = load_config('../config/config.yaml')
metrics_config = load_config('../config/metrics_config.yaml')
modeling_config = load_config('../config/modeling_config.yaml')
experiments_config = load_config('../config/experiments_config.yaml')


# set up config variables
dataset_category = 'macro_series'
dataset_name = 'google_trends'
dataset_config = config['datasets'][dataset_category][dataset_name]
dataset_metrics_config = metrics_config[dataset_category][dataset_name]

# load dataset
google_trends_df = td.retrieve_google_trends_data()


# calculate and merge all metrics in the config
all_metrics = []
for key in list(dataset_metrics_config.keys()):
    value_column_metrics_config = metrics_config[dataset_category][dataset_name][key]['metrics']
    metric_df = google_trends_df[['date',key]]

    # generate metrics
    metric_df, _ = cwm.generate_time_series_metrics(
        metric_df,
        config,
        value_column_metrics_config,
        key,
        id_column=None
    )

    all_metrics.append(metric_df)

all_metrics_df = all_metrics[0]
for metrics_df in all_metrics[1:]:
    all_metrics_df = pd.merge(all_metrics_df, metrics_df, on='date', how='outer')


# flatten metrics
flattened_features = fe.flatten_date_features(all_metrics_df,dataset_metrics_config)
flattened_google_trends_df = pd.DataFrame([flattened_features])

# save flattened metrics
flattened_google_trends_df, flattened_google_trends_filepath = fe.save_flattened_outputs(
    flattened_google_trends_df,
    os.path.join(modeling_config['modeling']['modeling_folder'],'outputs/flattened_outputs'),
    dataset_config['description'],
    config['training_data']['modeling_period_start']
)

# preprocess metrics
google_trends_preprocessed_df, google_trends_preprocessed_filepath = fe.preprocess_coin_df(
    flattened_google_trends_filepath
    ,modeling_config
    ,dataset_config
    ,dataset_metrics_config
)

google_trends_tuple = (google_trends_preprocessed_filepath.split('preprocessed_outputs/')[1], dataset_config['fill_method'])

## Prices Metrics

In [None]:
importlib.reload(td)
importlib.reload(cwm)
importlib.reload(fe)
importlib.reload(m)
importlib.reload(i)
config = load_config('../config/config.yaml')
metrics_config = load_config('../config/metrics_config.yaml')
modeling_config = load_config('../config/modeling_config.yaml')
experiments_config = load_config('../config/experiments_config.yaml')

# set up config variables
dataset_category = 'time_series'
dataset_name = 'prices'
dataset_config = config['datasets'][dataset_category][dataset_name]
dataset_metrics_config = metrics_config[dataset_category][dataset_name]

# retrieve and clean prices data
prices_df = td.retrieve_prices_data()
prices_df,prices_log = td.fill_prices_gaps(prices_df,config['data_cleaning']['max_gap_days'])

# calculate and merge all metrics in the config
all_metrics = []
for value_column in list(dataset_metrics_config.keys()):
    value_column_metrics_config = dataset_metrics_config[value_column]['metrics']
    metric_df = prices_df[['date','coin_id',value_column]]

    # generate metrics
    metric_df, _ = cwm.generate_time_series_metrics(
        metric_df,
        config,
        value_column_metrics_config,
        value_column,
        id_column='coin_id'
    )

    all_metrics.append(metric_df)

all_metrics_df = all_metrics[0]
for metrics_df in all_metrics[1:]:
    all_metrics_df = pd.merge(all_metrics_df, metrics_df, on='date', how='outer')


# flatten, save, and preprocess the flattened df
flattened_output_directory = os.path.join(modeling_config['modeling']['modeling_folder'],'outputs/flattened_outputs')

flattened_prices_metrics_df = fe.flatten_coin_date_df(
    all_metrics_df,
    dataset_metrics_config,
    config['training_data']['training_period_end']
)
flattened_prices_metrics_df, flattened_prices_metrics_filepath = fe.save_flattened_outputs(
    flattened_prices_metrics_df,
    flattened_output_directory,
    dataset_config['description'],
    config['training_data']['modeling_period_start']
)
prices_preprocessed_df, prices_preprocessed_filepath = fe.preprocess_coin_df(
    flattened_prices_metrics_filepath
    ,modeling_config
    ,dataset_config
    ,dataset_metrics_config
)


prices_tuple = (prices_preprocessed_filepath.split('preprocessed_outputs/')[1], dataset_config['fill_method'])

prices_tuple

In [None]:
importlib.reload(td)
importlib.reload(cwm)
importlib.reload(fe)
importlib.reload(m)
importlib.reload(i)
config = load_config('../config/config.yaml')
metrics_config = load_config('../config/metrics_config.yaml')
modeling_config = load_config('../config/modeling_config.yaml')
experiments_config = load_config('../config/experiments_config.yaml')


modeling_folder = modeling_config['modeling']['modeling_folder']
dataset_tuples = [google_trends_tuple,prices_tuple]

training_data_df, merge_logs_df = fe.create_training_data_df(modeling_folder, dataset_tuples)

In [None]:
training_data_df

In [None]:
merge_logs_df