In [None]:
#pip install ta
#!pip install prophet
#!pip install tweepy textblob
#!pip install scikit-learn

In [24]:
import pandas as pd
import numpy as np
import yfinance as yf
from ta.momentum import RSIIndicator
from ta.volatility import BollingerBands
from ta.trend import MACD
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score
from keras.models import Sequential
from keras.layers import LSTM, Dense
import matplotlib.pyplot as plt
import datetime
import warnings
#from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import RandomizedSearchCV
from scikeras.wrappers import KerasRegressor

warnings.filterwarnings("ignore")

In [None]:
# Step 1: Fetch Historical Data
btc_data = yf.download('BTC-USD', start='2015-01-01', end='2024-10-21')
dxy_data = yf.download('DX-Y.NYB', start='2015-01-01', end='2024-10-21')

# Merge BTC and DXY data on the date index
data = btc_data[['Close']].rename(columns={'Close': 'BTC_Close'})
data['DXY_Close'] = dxy_data['Close']

# Forward-fill any missing DXY data
data['DXY_Close'].fillna(method='ffill', inplace=True)

# Forecast DXY using Prophet
# Prepare DXY data for Prophet
dxy_prophet = dxy_data[['Close']].reset_index()
dxy_prophet.rename(columns={'Date': 'ds', 'Close': 'y'}, inplace=True)

# Initialize and fit the Prophet model
dxy_model = Prophet()
dxy_model.fit(dxy_prophet)

# Determine the number of days to predict
last_date = data.index[-1]
end_date = pd.to_datetime('2025-05-31')
days_to_predict = (end_date - last_date).days

# Create future dataframe for prediction
future_dates_dxy = dxy_model.make_future_dataframe(periods=days_to_predict)
dxy_forecast = dxy_model.predict(future_dates_dxy)

# Extract the forecasted DXY values
dxy_forecasted = dxy_forecast[['ds', 'yhat']]
dxy_forecasted.set_index('ds', inplace=True)

# Combine historical and forecasted DXY data
dxy_combined = pd.concat([dxy_prophet.set_index('ds')['y'], dxy_forecasted['yhat']], axis=0)
dxy_combined.name = 'DXY_Close_Forecasted'

# # Update data with forecasted DXY values
# data = data.reindex(pd.date_range(start=data.index[0], end=end_date, freq='D'))
# data['DXY_Close'] = dxy_combined

# # Forward-fill any remaining missing values
# data['DXY_Close'].fillna(method='ffill', inplace=True)

# # Inverse of DXY
# data['DXY_Inverse'] = 1 / data['DXY_Close']
# Ensure `dxy_combined` index is unique by removing duplicates (if any)
dxy_combined = dxy_combined[~dxy_combined.index.duplicated(keep='last')]

# Create a date range to reindex the `data` dataframe
date_range = pd.date_range(start=data.index[0], end=end_date, freq='D')
data = data.reindex(date_range)

# Align and merge the forecasted DXY data with the main dataframe using an outer join
data = data.join(dxy_combined, how='left')

# Fill any remaining missing values with forward fill
data['DXY_Close'].fillna(method='ffill', inplace=True)

# Calculate the inverse of DXY
data['DXY_Inverse'] = 1 / data['DXY_Close']

In [None]:
# Prepare DXY data for Prophet
dxy_prophet = dxy_data[['Close']].reset_index()
dxy_prophet.rename(columns={'Date': 'ds', 'Close': 'y'}, inplace=True)

# Initialize and fit the Prophet model
dxy_model = Prophet()
dxy_model.fit(dxy_prophet)

# Create future dataframe for prediction
future_dates = dxy_model.make_future_dataframe(periods=days_to_predict)
dxy_forecast = dxy_model.predict(future_dates)

# Extract the forecasted DXY values
dxy_forecasted = dxy_forecast[['ds', 'yhat']]
dxy_forecasted.set_index('ds', inplace=True)

In [28]:
# Replace these with your own API keys and tokens


# Authenticate to Twitter
auth = tweepy.OAuth1UserHandler(consumer_key, consumer_secret, access_token, access_token_secret)
api = tweepy.API(auth)

In [29]:
# Step 2: Calculate Technical Indicators
# RSI
rsi_indicator = RSIIndicator(close=data['BTC_Close'], window=14)
data['RSI'] = rsi_indicator.rsi()

# Bollinger Bands
bb_indicator = BollingerBands(close=data['BTC_Close'], window=20, window_dev=2)
data['BB_High'] = bb_indicator.bollinger_hband()
data['BB_Low'] = bb_indicator.bollinger_lband()

# MACD
macd_indicator = MACD(close=data['BTC_Close'], window_slow=26, window_fast=12, window_sign=9)
data['MACD'] = macd_indicator.macd_diff()

In [30]:
def get_sentiment(date):
    try:
        # Search for tweets containing 'Bitcoin' on a specific date
        tweets = tweepy.Cursor(api.search_tweets,
                               q='Bitcoin',
                               lang='en',
                               since=date.strftime('%Y-%m-%d'),
                               until=(date + pd.Timedelta(days=1)).strftime('%Y-%m-%d'),
                               tweet_mode='extended').items(100)
        
        polarity = []
        for tweet in tweets:
            analysis = TextBlob(tweet.full_text)
            polarity.append(analysis.sentiment.polarity)
        
        # Return the average polarity
        if polarity:
            return np.mean(polarity)
        else:
            return 0
    except Exception as e:
        print(f"Error on date {date}: {e}")
        return 0

In [38]:
# # Apply sentiment analysis to historical data
# data['Sentiment'] = data.index.to_series().apply(get_sentiment)

# # For missing sentiment values in future dates, assume neutral sentiment
# data['Sentiment'].fillna(0, inplace=True)

# # Drop NaN values
# data.dropna(inplace=True)
data['Sentiment'] = 1

In [39]:
# Step 3: Prepare Features and Labels
features = data[['DXY_Inverse', 'RSI', 'BB_High', 'BB_Low', 'MACD', 'Sentiment']]
labels = data['BTC_Close']

# Normalize Features and Labels
scaler_features = MinMaxScaler()
scaler_labels = MinMaxScaler()

features_scaled = scaler_features.fit_transform(features)
labels_scaled = scaler_labels.fit_transform(labels.values.reshape(-1, 1))

In [40]:

# Step 4: Create Sequences for LSTM
def create_sequences(features, labels, time_steps=60):
    X, y = [], []
    for i in range(len(features) - time_steps):
        X.append(features[i:i+time_steps])
        y.append(labels[i+time_steps])
    return np.array(X), np.array(y)

time_steps = 60
X, y = create_sequences(features_scaled, labels_scaled, time_steps)

# Split Data into Training and Testing Sets
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

In [None]:
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense

def create_model(units=50, optimizer='adam'):
    model = Sequential()
    model.add(LSTM(units=units, return_sequences=True, input_shape=(time_steps, X.shape[2])))
    model.add(LSTM(units=units))
    model.add(Dense(1))
    model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['mean_squared_error'])
    return model

# Handle missing values in X_train and y_train
X_train = np.nan_to_num(X_train, nan=0)  # Replace NaNs with 0 in X_train
y_train = np.nan_to_num(y_train, nan=0)  # Replace NaNs with 0 in y_train

# Set predefined hyperparameters
units = 50
optimizer = 'adam'
epochs = 10
batch_size = 32

# Create and train the model
model = create_model(units=units, optimizer=optimizer)

# Fit the model on the training data
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=1)

# The model is now trained and can be used for evaluation or predictions.

In [None]:
# Step 6: Train the Final Model
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test))

In [None]:
print('Number of NaNs in y_test_inv:', np.isnan(y_test_inv).sum())
print('Number of NaNs in y_pred_inv:', np.isnan(y_pred_inv).sum())


In [None]:
# Step 7: Make Predictions on Test Set
y_pred_scaled = model.predict(X_test)

# Inverse Transform the Predictions and Labels
y_test_inv = scaler_labels.inverse_transform(y_test)
y_pred_inv = scaler_labels.inverse_transform(y_pred_scaled)

# Evaluate the Model
r2 = r2_score(y_test_inv, y_pred_inv)
print(f"R-squared Score on Test Set: {r2}")

In [None]:
# Step 8: Forecast Future Prices until May 2025
# Initialize variables for iterative prediction
future_predictions = []
last_sequence = X[-1]  # Start with the last sequence from the dataset

for day in range(days_to_predict):
    # Predict the next price
    next_pred_scaled = model.predict(last_sequence.reshape(1, time_steps, X.shape[2]))
    next_pred = scaler_labels.inverse_transform(next_pred_scaled)[0][0]
    future_predictions.append(next_pred)
    
    # Update the data with the new predicted price
    next_date = data.index[-1] + pd.Timedelta(days=1)
    data = data.append(pd.DataFrame({'BTC_Close': next_pred}, index=[next_date]))
    
    # Update DXY_Close with forecasted value
    dxy_value = dxy_combined[next_date]
    data.at[next_date, 'DXY_Close'] = dxy_value
    data.at[next_date, 'DXY_Inverse'] = 1 / dxy_value
    
    # For sentiment, assume neutral sentiment or use a forecasted value
    data.at[next_date, 'Sentiment'] = 0  # Assuming neutral sentiment

    # Recalculate technical indicators
    rsi_indicator = RSIIndicator(close=data['BTC_Close'], window=14)
    data['RSI'] = rsi_indicator.rsi()

    bb_indicator = BollingerBands(close=data['BTC_Close'], window=20, window_dev=2)
    data['BB_High'] = bb_indicator.bollinger_hband()
    data['BB_Low'] = bb_indicator.bollinger_lband()

    macd_indicator = MACD(close=data['BTC_Close'], window_slow=26, window_fast=12, window_sign=9)
    data['MACD'] = macd_indicator.macd_diff()
    
    # Get the latest features
    latest_features = data[['DXY_Inverse', 'RSI', 'BB_High', 'BB_Low', 'MACD', 'Sentiment']].iloc[-time_steps:]
    latest_features_scaled = scaler_features.transform(latest_features)
    
    # Update the last_sequence
    last_sequence = np.array(latest_features_scaled)

# Create a date range for future predictions
future_dates = pd.date_range(start=data.index[-days_to_predict], periods=days_to_predict, freq='D')

# Plot future predictions
plt.figure(figsize=(14, 5))
plt.plot(future_dates, future_predictions, label='Predicted BTC Price')
plt.title('Bitcoin Price Prediction until May 2025')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()

In [4]:
import pandas as pd
import numpy as np
import yfinance as yf
from ta.momentum import RSIIndicator
from ta.volatility import BollingerBands
from ta.trend import MACD
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
from prophet import Prophet
import warnings

warnings.filterwarnings("ignore")

# Step 1: Fetch Historical Data
btc_data = yf.download('BTC-USD', start='2015-01-01', end='2023-10-21')
dxy_data = yf.download('DX-Y.NYB', start='2015-01-01', end='2023-10-21')

# Merge BTC and DXY data on the date index
data = btc_data[['Close']].rename(columns={'Close': 'BTC_Close'})
data['DXY_Close'] = dxy_data['Close']

# Forward-fill any missing DXY data
data['DXY_Close'].fillna(method='ffill', inplace=True)

# Drop any remaining NaN values
data.dropna(inplace=True)

# Step 2: Forecast DXY Values using Prophet
dxy_prophet = dxy_data[['Close']].reset_index()
dxy_prophet.rename(columns={'Date': 'ds', 'Close': 'y'}, inplace=True)

dxy_model = Prophet()
dxy_model.fit(dxy_prophet)

future_dates_dxy = dxy_model.make_future_dataframe(periods=800)
dxy_forecast = dxy_model.predict(future_dates_dxy)

# Extract the forecasted DXY values
dxy_forecasted = dxy_forecast[['ds', 'yhat']]
dxy_forecasted.set_index('ds', inplace=True)

# Combine historical and forecasted DXY data
dxy_combined = pd.concat([dxy_prophet.set_index('ds')['y'], dxy_forecasted['yhat']], axis=0)
dxy_combined = dxy_combined[~dxy_combined.index.duplicated(keep='last')]

end_date = pd.to_datetime('2025-05-31')

# Create a date range to reindex the `data` dataframe
date_range = pd.date_range(start=data.index[0], end=end_date, freq='D')
data = data.reindex(date_range)

# Align and merge the forecasted DXY data with the main dataframe using an outer join
data = data.join(dxy_combined, how='left', rsuffix='_forecast')

# Fill any remaining missing values with forward fill
data['DXY_Close'].fillna(method='ffill', inplace=True)

# Calculate the inverse of DXY
data['DXY_Inverse'] = 1 / data['DXY_Close']

# Step 3: Calculate Technical Indicators
rsi_indicator = RSIIndicator(close=data['BTC_Close'], window=14)
data['RSI'] = rsi_indicator.rsi()

bb_indicator = BollingerBands(close=data['BTC_Close'], window=20, window_dev=2)
data['BB_High'] = bb_indicator.bollinger_hband()
data['BB_Low'] = bb_indicator.bollinger_lband()

macd_indicator = MACD(close=data['BTC_Close'], window_slow=26, window_fast=12, window_sign=9)
data['MACD'] = macd_indicator.macd_diff()

# Drop NaN values
data.dropna(inplace=True)

# Step 4: Prepare Features and Labels
features = data[['DXY_Inverse', 'RSI', 'BB_High', 'BB_Low', 'MACD']]
labels = data['BTC_Close']

# Normalize Features and Labels
scaler_features = MinMaxScaler()
scaler_labels = MinMaxScaler()

features_scaled = scaler_features.fit_transform(features)
labels_scaled = scaler_labels.fit_transform(labels.values.reshape(-1, 1))

features_scaled_df = pd.DataFrame(features_scaled, index=features.index, columns=features.columns)
labels_scaled_df = pd.DataFrame(labels_scaled, index=labels.index, columns=['BTC_Close'])

# Step 5: Create Sequences for LSTM
def create_sequences(features, labels, time_steps=60):
    X, y = [], []
    for i in range(len(features) - time_steps):
        X.append(features.iloc[i:i+time_steps].values)
        y.append(labels.iloc[i+time_steps])
    return np.array(X), np.array(y)

time_steps = 60
X, y = create_sequences(features_scaled_df, labels_scaled_df['BTC_Close'], time_steps)

# Split Data into Training and Testing Sets
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# Step 6: Build the LSTM Model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(X.shape[1], X.shape[2])))
model.add(LSTM(50))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')

# Step 7: Train the Model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Step 8: Make Predictions on Test Set
y_pred_scaled = model.predict(X_test)

# Inverse Transform the Predictions and Labels
y_test_inv = scaler_labels.inverse_transform(y_test.reshape(-1, 1))
y_pred_inv = scaler_labels.inverse_transform(y_pred_scaled)

# Evaluate the Model
r2 = r2_score(y_test_inv, y_pred_inv)
print(f"R-squared Score on Test Set: {r2}")

# Plot Actual vs Predicted Prices on Test Set
plt.figure(figsize=(14, 5))
plt.plot(y_test_inv, label='Actual BTC Price')
plt.plot(y_pred_inv, label='Predicted BTC Price')
plt.title('Bitcoin Price Prediction on Test Set')
plt.xlabel('Time')
plt.ylabel('Price')
plt.legend()
plt.show()

# Step 9: Predict Future BTC Prices for 2025
# Get the last 'time_steps' data points from the scaled features
last_features = features_scaled_df[-time_steps:].values

# Forecasting into the future
future_predictions = []
future_dates = pd.date_range(start=features_scaled_df.index[-1] + pd.Timedelta(days=1), end='2025-12-31', freq='D')

for date in future_dates:
    X_input = last_features.reshape(1, time_steps, features_scaled_df.shape[1])
    y_pred_scaled = model.predict(X_input)
    y_pred_inv = scaler_labels.inverse_transform(y_pred_scaled)
    future_predictions.append(y_pred_inv[0][0])
    
    # Update technical indicators using the predicted value
    dxy_value = data.loc[date, 'DXY_Close']
    dxy_inverse = 1 / dxy_value
    
    # Create a properly named Series for the predicted BTC price
    btc_close_series = pd.Series([y_pred_inv[0][0]], index=[date], name='BTC_Close')

    btc_close_prices = labels_scaled_df['BTC_Close'].append(btc_close_series)

    if len(btc_close_prices) >= 14:
        rsi_value = RSIIndicator(close=btc_close_prices).rsi().iloc[-1]
    else:
        rsi_value = 50
    
    if len(btc_close_prices) >= 20:
        bb_high = BollingerBands(close=btc_close_prices).bollinger_hband().iloc[-1]
        bb_low = BollingerBands(close=btc_close_prices).bollinger_lband().iloc[-1]
    else:
        bb_high = bb_low = y_pred_inv[0][0]

    if len(btc_close_prices) >= 26:
        macd_value = MACD(close=btc_close_prices).macd_diff().iloc[-1]
    else:
        macd_value = 0
    
    new_feature = np.array([dxy_inverse, rsi_value, bb_high, bb_low, macd_value])
    new_feature_scaled = scaler_features.transform(new_feature.reshape(1, -1))
    
    last_features = np.vstack((last_features[1:], new_feature_scaled))

# Plot Future Predictions
plt.figure(figsize=(14, 5))
plt.plot(future_dates, future_predictions, label='Predicted BTC Price')
plt.title('Bitcoin Price Prediction for 2025')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
23:21:52 - cmdstanpy - INFO - Chain [1] start processing
23:21:53 - cmdstanpy - INFO - Chain [1] done processing


ValueError: Other Series must have a name