# PASSO 00 IMPORTS

In [5]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


import requests

# PASSO 01 DATA DESCRIPTION

In [6]:
def get_historical_data(coin, days=90):
    url = f'https://api.coingecko.com/api/v3/coins/{coin}/market_chart?vs_currency=usd&days={days}'
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
        prices = data['prices']
        df = pd.DataFrame(prices, columns=["Timestamp", "Price (USD)"])
        df['Timestamp'] = pd.to_datetime(df['Timestamp'], unit='ms')
        return df
    except Exception as e:
        st.error(f"Erro ao buscar dados históricos: {e}")
        return pd.DataFrame()

In [7]:
historical_data = get_historical_data('bitcoin',days=30)
historical_data.to_parquet('../data/raw/historical_data.parquet')

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Carregar os dados (supondo que já tenha sido coletado)
df_principal = pd.read_parquet('../data/raw/historical_data.parquet')

# FEATURE ENGINEERING

In [10]:
df = df_principal.copy()
df.tail()

Unnamed: 0,Timestamp,Price (USD)
714,2025-02-06 19:02:56.212,96340.129989
715,2025-02-06 20:04:01.843,95967.15004
716,2025-02-06 21:03:56.060,96900.668886
717,2025-02-06 22:01:12.092,96784.585045
718,2025-02-06 23:13:01.000,96773.964378


In [11]:
df = df.rename(columns={"Timestamp":"timestamp","Price (USD)":"price (usd)"})
df

Unnamed: 0,timestamp,price (usd)
0,2025-01-07 23:01:54.139,96966.665012
1,2025-01-08 00:03:56.480,96952.098868
2,2025-01-08 01:00:01.762,97121.198531
3,2025-01-08 02:00:06.054,96652.024304
4,2025-01-08 03:03:41.727,96866.906198
...,...,...
714,2025-02-06 19:02:56.212,96340.129989
715,2025-02-06 20:04:01.843,95967.150040
716,2025-02-06 21:03:56.060,96900.668886
717,2025-02-06 22:01:12.092,96784.585045


In [12]:
df['day_of_week'] = df['timestamp'].dt.dayofweek


# Extrair o número do dia no mês
df['day_of_month'] = df['timestamp'].dt.day

In [13]:
# Média móvel de 7 dias (curto prazo)
df['moving_avg_7'] = df['price (usd)'].rolling(window=7).mean()

# Média móvel de 30 dias (médio prazo)
df['moving_avg_30'] = df['price (usd)'].rolling(window=30).mean()

# Média móvel de 90 dias (longo prazo)
df['moving_avg_90'] = df['price (usd)'].rolling(window=90).mean()

In [14]:
# Calcular a variação percentual diária (retorno)
df['daily_return'] = df['price (usd)'].pct_change()

# Calcular a variação absoluta diária
df['daily_return_abs'] = df['price (usd)'].diff()

In [15]:
df['volatility'] = df['daily_return'].rolling(window=30).std()

In [16]:
# Função para calcular o RSI
def calculate_rsi(data, window):
    delta = data.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))

# Calcular o RSI de 14 dias
df['RSI_14'] = calculate_rsi(df['price (usd)'], window=14)

In [17]:
# Média exponencial de 7 dias
df['ema_7'] = df['price (usd)'].ewm(span=7, adjust=False).mean()

# Média exponencial de 30 dias
df['ema_30'] = df['price (usd)'].ewm(span=30, adjust=False).mean()

# Variable Filtering

In [18]:
df = df.dropna()

In [19]:
# Lista das variáveis derivadas
derived_columns = [
    'day_of_week',  'day_of_month', 
    'moving_avg_7', 'moving_avg_30', 'moving_avg_90'
]

# Filtrando apenas as variáveis derivadas
df_derived = df[derived_columns]

# Exibindo as primeiras linhas das variáveis derivadas
print(df_derived.head())

    day_of_week  day_of_month  moving_avg_7  moving_avg_30  moving_avg_90
89            5            11  94472.382736   94491.308360   94438.327812
90            5            11  94484.952677   94470.379685   94409.941707
91            5            11  94429.234757   94439.137268   94378.144222
92            5            11  94379.145377   94412.591325   94346.097421
93            5            11  94328.880567   94434.542758   94318.880688


In [20]:
scaler = StandardScaler()
# Separar variáveis independentes (apenas as derivadas)
X_derived = df_derived

# Variável dependente (preço)
y = df['price (usd)']

# Dividir em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X_derived, y, test_size=0.3, shuffle=False)

# Normalizar as variáveis independentes
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [21]:
# Inicializar e treinar o modelo de regressão linear
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

# Prever no conjunto de teste
y_pred = linear_model.predict(X_test)

# Avaliar o modelo
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error (Random Forest): {mse}')
print(f'Root Mean Squared Error (RMSE) (Random Forest): {mse}')
print(f'Mean Absolute Error (MAE) (Random Forest): {mae}')
print(f'R² (Random Forest): {r2}')

Mean Squared Error (Random Forest): 773880.0374516668
Root Mean Squared Error (RMSE) (Random Forest): 773880.0374516668
Mean Absolute Error (MAE) (Random Forest): 603.3504716929986
R² (Random Forest): 0.9243899727857942


In [22]:
from xgboost import XGBRegressor
from sklearn.ensemble import GradientBoostingRegressor

# ------------------ Treinamento com XGBoost ------------------
xgb_model = XGBRegressor(n_estimators=100, learning_rate=0.4, random_state=42)
xgb_model.fit(X_train, y_train)

# Previsão com XGBoost
xgb_y_pred = xgb_model.predict(X_test)

# Avaliação do XGBoost
xgb_mse = mean_squared_error(y_test, xgb_y_pred)
xgb_rmse = np.sqrt(xgb_mse)
xgb_mae = mean_absolute_error(y_test, xgb_y_pred)
xgb_r2 = r2_score(y_test, xgb_y_pred)

print("------ XGBoost Results ------")
print(f'Mean Squared Error (XGBoost): {xgb_mse}')
print(f'Root Mean Squared Error (RMSE) (XGBoost): {xgb_rmse}')
print(f'Mean Absolute Error (MAE) (XGBoost): {xgb_mae}')
print(f'R² (XGBoost): {xgb_r2}')


# ------------------ Treinamento com Gradient Boosting ------------------
gb_model = GradientBoostingRegressor(n_estimators=10, learning_rate=0.01, random_state=42)
gb_model.fit(X_train, y_train)

# Previsão com Gradient Boosting
gb_y_pred = gb_model.predict(X_test)

# Avaliação do Gradient Boosting
gb_mse = mean_squared_error(y_test, gb_y_pred)
gb_rmse = np.sqrt(gb_mse)
gb_mae = mean_absolute_error(y_test, gb_y_pred)
gb_r2 = r2_score(y_test, gb_y_pred)

print("\n------ Gradient Boosting Results ------")
print(f'Mean Squared Error (Gradient Boosting): {gb_mse}')
print(f'Root Mean Squared Error (RMSE) (Gradient Boosting): {gb_rmse}')
print(f'Mean Absolute Error (MAE) (Gradient Boosting): {gb_mae}')
print(f'R² (Gradient Boosting): {gb_r2}')

------ XGBoost Results ------
Mean Squared Error (XGBoost): 1634665.327436328
Root Mean Squared Error (RMSE) (XGBoost): 1278.5403112285228
Mean Absolute Error (MAE) (XGBoost): 1024.7607356479846
R² (XGBoost): 0.8402890836923821

------ Gradient Boosting Results ------
Mean Squared Error (Gradient Boosting): 10246346.15466787
Root Mean Squared Error (RMSE) (Gradient Boosting): 3200.9914330825486
Mean Absolute Error (MAE) (Gradient Boosting): 2742.1362745790752
R² (Gradient Boosting): -0.0010938053806519576


In [23]:
import joblib

# Caminho local para salvar o modelo
local_model_path = '../models/rl_model.joblib'


# Salvar o modelo
joblib.dump(linear_model, local_model_path)

print(f"Modelo salvo localmente em: {local_model_path}")


Modelo salvo localmente em: ../models/rl_model.joblib


In [25]:
reg_model = joblib.load('../models/rl_model.joblib')


In [26]:
X_train.columns

Index(['day_of_week', 'day_of_month', 'moving_avg_7', 'moving_avg_30',
       'moving_avg_90'],
      dtype='object')

In [28]:
X_train

Unnamed: 0,day_of_week,day_of_month,moving_avg_7,moving_avg_30,moving_avg_90
89,5,11,94472.382736,94491.308360,94438.327812
90,5,11,94484.952677,94470.379685,94409.941707
91,5,11,94429.234757,94439.137268,94378.144222
92,5,11,94379.145377,94412.591325,94346.097421
93,5,11,94328.880567,94434.542758,94318.880688
...,...,...,...,...,...
525,2,29,103032.792313,102261.758741,102373.828520
526,2,29,103238.760314,102301.972637,102355.837520
527,3,30,103474.717136,102337.176678,102340.743941
528,3,30,103698.038563,102401.368761,102331.063322


In [37]:
novos_dados = [[5,6,94472.382736,94472.382736,94378.144222]]
predicao = reg_model.predict(novos_dados)



In [42]:

print(f'O valor predito é: {predicao[0]:.2f}')

O valor predito é: 94680.86
