<a href="https://colab.research.google.com/github/cauefeder/Finances/blob/master/Indicators%26Predictions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from datetime import datetime
import random
import yfinance as yf
import pandas as pd
import numpy as np

import plotly.graph_objects as go
from plotly.subplots import make_subplots

import warnings
warnings.filterwarnings("ignore")

import sys
import time

In [None]:
stock_symbol = "BTC-USD"

start = "2018-01-01"
end = datetime.today().strftime("%Y-%m-%d")

df1 = yf.download(stock_symbol, start, end)
df1["Returns"] = df1["Adj Close"].pct_change(1)
df1["Adj Low"] = df1["Low"] - (df1["Close"]-df1["Adj Close"])
df1["Adj High"] = df1["High"] - (df1["Close"]-df1["Adj Close"])
df1["Adj Open"] = df1["Open"] - (df1["Close"]-df1["Adj Close"])
df1["Returns"] = df1["Adj Close"].pct_change(1)
df1["Vol"] = df1["Returns"].rolling(20).std()*np.sqrt(252)

p = 5
df1["Alvo1"] = df1["Adj Close"].shift(-p)
df1["Alvo2"] = np.where(df1["Alvo1"] > df1["Adj Close"], 1, 0)
df1["Alvo3"] = df1["Adj Close"].pct_change(p).shift(-p)
#print(df1.tail())

[*********************100%%**********************]  1 of 1 completed


In [None]:
df1["MA5"] = df1["Adj Close"].rolling(5).mean()
df1["MA10"] = df1["Adj Close"].rolling(10).mean()
df1["MA20"] = df1["Adj Close"].rolling(20).mean()
df1["MA52"] = df1["Adj Close"].rolling(52).mean()

df1["RSL5"] = np.round(df1["Adj Close"]/df1["MA5"]-1, 4)*100
df1["RSL10"] = np.round(df1["Adj Close"]/df1["MA10"]-1, 4)*100
df1["RSL20"] = np.round(df1["Adj Close"]/df1["MA20"]-1, 4)*100
df1["RSL52"] = np.round(df1["Adj Close"]/df1["MA52"]-1, 4)*100

df1.dropna(axis = 0, inplace = True)

In [None]:
pd.cut(df1["RSL5"], [-3, -2, -1, 0, 1, 2, 3])

Date
2018-02-21    (-3.0, -2.0]
2018-02-22             NaN
2018-02-23             NaN
2018-02-24             NaN
2018-02-25             NaN
                  ...     
2024-03-17    (-2.0, -1.0]
2024-03-18    (-2.0, -1.0]
2024-03-19             NaN
2024-03-20      (2.0, 3.0]
2024-03-21    (-2.0, -1.0]
Name: RSL5, Length: 2221, dtype: category
Categories (6, interval[int64, right]): [(-3, -2] < (-2, -1] < (-1, 0] < (0, 1] < (1, 2] < (2, 3]]

In [None]:
df1["RSL5_q"] = pd.cut(df1["RSL5"], [-3, -2, -1, 0, 1, 2, 3]
                       , labels = ["F", "E", "D", "C", "B", "A"])
df1["RSL10_q"] = pd.cut(df1["RSL10"], [-3, -2, -1, 0, 1, 2, 3]
                       , labels = ["F", "E", "D", "C", "B", "A"])
df1["RSL20_q"] = pd.cut(df1["RSL20"], [-3, -2, -1, 0, 1, 2, 3]
                       , labels = ["F", "E", "D", "C", "B", "A"])
df1["RSL52_q"] = pd.cut(df1["RSL52"], [-3, -2, -1, 0, 1, 2, 3]
                       , labels = ["F", "E", "D", "C", "B", "A"])

In [None]:
pivot = np.round(pd.pivot_table(df1, values = "Alvo3"
                                ,  index = ["RSL20_q", "RSL5_q"]
                                #, columns = ["Alvo2"]
                                ,  aggfunc = [np.mean, np.sum]
                                , fill_value = 0)
                 , 4)*100
pivot

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,sum
Unnamed: 0_level_1,Unnamed: 1_level_1,Alvo3,Alvo3
RSL20_q,RSL5_q,Unnamed: 2_level_2,Unnamed: 3_level_2
F,F,2.22,17.79
F,E,0.25,3.53
F,D,1.86,55.82
F,C,-1.86,-16.74
F,B,2.23,22.32
F,A,2.96,14.81
E,F,2.72,29.9
E,E,0.04,0.89
E,D,1.04,39.49
E,C,-1.6,-41.66


In [None]:
fig = make_subplots(rows = 2, cols = 1
                    , shared_xaxes = True
                    , vertical_spacing = 0.05)

fig.add_trace(go.Candlestick(x = df1.index
                             , open = df1["Adj Open"], high = df1["Adj High"]
                             , low = df1["Adj Low"], close = df1["Adj Close"]
                             , name = "Candle"
                             , increasing_line_color = "black", decreasing_line_color = "red")
              , row = 1, col = 1
             )

fig.add_trace(go.Scatter(x = df1.index, y = df1["RSL5"]
                                , name = "RSL5"
                                , line = dict(color = "red"))
              , row = 2, col = 1)

fig.add_trace(go.Scatter(x = df1.index, y = df1["RSL10"]
                                , name = "RSL10"
                                , line = dict(color = "blue"))
              , row = 2, col = 1)

fig.add_trace(go.Scatter(x = df1.index, y = df1["RSL20"]
                                , name = "RSL20"
                                , line = dict(color = "purple"))
              , row = 2, col = 1)


fig.update_layout(height = 600, width = 800
                  , title_text = "RSL"
                  , font_color = "blue"
                  , title_font_color = "black"
                  , xaxis2_title = "Time"
                  , yaxis_title = "Close (%)"
                  , yaxis2_title = "RSL (%)"
                  , font = dict(size = 15, color = "Black")
                 )

fig.update_layout(
    xaxis=dict(
    rangeselector=dict(
        buttons=list([
            dict(count=1,
                 label="1m",
                 step="month",
                 stepmode="backward"),
            dict(count=6,
                 label="6m",
                 step="month",
                 stepmode="backward"),
            dict(count=1,
                 label="YTD",
                 step="year",
                 stepmode="todate"),
            dict(count=1,
                 label="1y",
                 step="year",
                 stepmode="backward"),
            dict(step="all")
        ])
    ),
    rangeslider=dict(
        visible=False
    ),
    type="date"
)
)

fig.update_layout(hovermode = "x unified")

# Code to exclude empty dates from the chart
dt_all = pd.date_range(start = df1.index[0]
                       , end = df1.index[-1]
                       , freq = "D")
dt_all_py = [d.to_pydatetime() for d in dt_all]
dt_obs_py = [d.to_pydatetime() for d in df1.index]

dt_breaks = [d for d in dt_all_py if d not in dt_obs_py]

fig.update_xaxes(
    rangebreaks = [dict(values = dt_breaks)]
)


fig.show()

In [None]:
df1 = yf.download(stock_symbol, start, end)
df1["Returns"] = df1["Adj Close"].pct_change(1)
df1["Adj Low"] = df1["Low"] - (df1["Close"]-df1["Adj Close"])
df1["Adj High"] = df1["High"] - (df1["Close"]-df1["Adj Close"])
df1["Adj Open"] = df1["Open"] - (df1["Close"]-df1["Adj Close"])
df1["Returns"] = df1["Adj Close"].pct_change(1)
df1["Vol"] = df1["Returns"].rolling(20).std()*np.sqrt(252)

p = 5
df1["Alvo1"] = df1["Adj Close"].shift(-p)
df1["Alvo2"] = np.where(df1["Alvo1"] > df1["Adj Close"], 1, 0)
df1["Alvo3"] = df1["Adj Close"].pct_change(p).shift(-p)

# Calculating moving averages
df1['MA200'] = df1['Adj Close'].rolling(window=200).mean()
df1['MA100'] = df1['Adj Close'].rolling(window=100).mean()
df1['MA50'] = df1['Adj Close'].rolling(window=50).mean()

# Initialize the figure with subplots
fig = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.02)

# Add traces for candlestick chart
fig.add_trace(go.Candlestick(x=df1.index,
                open=df1['Adj Open'],
                high=df1['Adj High'],
                low=df1['Adj Low'],
                close=df1['Adj Close'], name="Candlesticks"), row=1, col=1)

# Add traces for moving averages
fig.add_trace(go.Scatter(x=df1.index, y=df1['MA200'], mode='lines', name='MA200'), row=1, col=1)
fig.add_trace(go.Scatter(x=df1.index, y=df1['MA100'], mode='lines', name='MA100'), row=1, col=1)
fig.add_trace(go.Scatter(x=df1.index, y=df1['MA50'], mode='lines', name='MA50'), row=1, col=1)

# Plot buy/sell signals where the moving averages cross
for i in range(1, len(df1)):
    if df1['MA50'][i] > df1['MA100'][i] and df1['MA50'][i - 1] <= df1['MA100'][i - 1]:
        fig.add_trace(go.Scatter(x=[df1.index[i]], y=[df1['Adj Close'][i]], mode='markers', marker=dict(color='green', size=10), name='Buy Signal'), row=1, col=1)
    elif df1['MA50'][i] < df1['MA100'][i] and df1['MA50'][i - 1] >= df1['MA100'][i - 1]:
        fig.add_trace(go.Scatter(x=[df1.index[i]], y=[df1['Adj Close'][i]], mode='markers', marker=dict(color='red', size=10), name='Sell Signal'), row=1, col=1)

# Set titles and axis labels
fig.update_layout(
    title='Historic Chart with Moving Averages and Buy/Sell Signals',
    yaxis_title='Price',
    xaxis_rangeslider_visible=False
)

# Show the plot
fig.show()

[*********************100%%**********************]  1 of 1 completed


In [None]:
pip install yfinance pandas numpy scikit-learn xgboost



In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report
import warnings
warnings.filterwarnings('ignore')

def calculate_features(df):
    """Calculate technical indicators and features"""
    # Basic price features
    df['Returns'] = df['Adj Close'].pct_change()

    # Moving averages
    df['MA20'] = df['Adj Close'].rolling(window=20).mean()
    df['MA50'] = df['Adj Close'].rolling(window=50).mean()
    df['MA200'] = df['Adj Close'].rolling(window=200).mean()

    # Volatility
    df['Volatility'] = df['Returns'].rolling(window=20).std()

    # RSI
    delta = df['Adj Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))

    # MACD
    exp12 = df['Adj Close'].ewm(span=12, adjust=False).mean()
    exp26 = df['Adj Close'].ewm(span=26, adjust=False).mean()
    df['MACD'] = exp12 - exp26
    df['Signal'] = df['MACD'].ewm(span=9, adjust=False).mean()

    # Volume indicators
    df['Volume_MA'] = df['Volume'].rolling(window=20).mean()

    # Clean data
    df.dropna(inplace=True)

    return df

def prepare_data(df):
    """Prepare features and target for modeling"""
    features = [
        'MA20', 'MA50', 'MA200', 'Volatility', 'RSI',
        'MACD', 'Signal', 'Volume_MA'
    ]

    X = df[features].values
    y = (df['Returns'] > 0).astype(int).values

    return X, y, features

def main():
    # Download data
    print("Downloading cryptocurrency data...")
    symbol = "BTC-USD"
    start = "2018-01-01"
    end = datetime.today().strftime("%Y-%m-%d")

    df = yf.download(symbol, start=start, end=end)

    # Calculate features
    print("\nCreating features...")
    df = calculate_features(df)

    # Prepare data for modeling
    X, y, features = prepare_data(df)

    # Split data
    split_idx = int(len(X) * 0.8)
    X_train = X[:split_idx]
    X_test = X[split_idx:]
    y_train = y[:split_idx]
    y_test = y[split_idx:]

    # Train model
    print("\nTraining model...")
    model = XGBClassifier(
        n_estimators=100,
        learning_rate=0.1,
        max_depth=5,
        random_state=42
    )

    model.fit(X_train, y_train)

    # Evaluate model
    y_pred = model.predict(X_test)

    print("\nModel Performance:")
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))

    # Make prediction for next movement
    latest_features = X[-1].reshape(1, -1)
    probability = model.predict_proba(latest_features)[0]
    prediction = "Up" if probability[1] > 0.5 else "Down"

    print("\nNext Movement Prediction:")
    print(f"Direction: {prediction}")
    print(f"Confidence: {max(probability):.2f}")

    # Feature importance
    importance = pd.DataFrame({
        'Feature': features,
        'Importance': model.feature_importances_
    }).sort_values('Importance', ascending=False)

    print("\nFeature Importance:")
    print(importance)

if __name__ == "__main__":
    main()

[*********************100%***********************]  1 of 1 completed

Downloading cryptocurrency data...

Creating features...

Training model...






Model Performance:
Accuracy: 0.5370

Classification Report:
              precision    recall  f1-score   support

           0       0.52      0.64      0.58       226
           1       0.56      0.44      0.49       234

    accuracy                           0.54       460
   macro avg       0.54      0.54      0.53       460
weighted avg       0.54      0.54      0.53       460


Next Movement Prediction:
Direction: Down
Confidence: 0.65

Feature Importance:
      Feature  Importance
4         RSI    0.176422
5        MACD    0.136552
6      Signal    0.130308
7   Volume_MA    0.116362
3  Volatility    0.115129
1        MA50    0.112573
2       MA200    0.107355
0        MA20    0.105299


In [None]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Data
feature_importance = pd.DataFrame([
    {'name': 'RSI', 'value': 0.176422},
    {'name': 'MACD', 'value': 0.136552},
    {'name': 'Signal', 'value': 0.130308},
    {'name': 'Volume_MA', 'value': 0.116362},
    {'name': 'Volatility', 'value': 0.115129},
    {'name': 'MA50', 'value': 0.112573},
    {'name': 'MA200', 'value': 0.107355},
    {'name': 'MA20', 'value': 0.105299}
])

model_metrics = pd.DataFrame([
    {'name': 'Precision (Down)', 'value': 0.52},
    {'name': 'Precision (Up)', 'value': 0.56},
    {'name': 'Recall (Down)', 'value': 0.64},
    {'name': 'Recall (Up)', 'value': 0.44},
    {'name': 'Overall Accuracy', 'value': 0.54}
])

# Create subplots
fig = make_subplots(rows=2, cols=1, subplot_titles=('Feature Importance Analysis', 'Model Performance Metrics'))

# Feature Importance Bar Chart
fig.add_trace(
    go.Bar(x=feature_importance['name'], y=feature_importance['value'], name='Importance Score'),
    row=1, col=1
)

# Model Metrics Line Chart
fig.add_trace(
    go.Scatter(x=model_metrics['name'], y=model_metrics['value'], mode='lines+markers', name='Score'),
    row=2, col=1
)

# Update layout
fig.update_layout(height=800, showlegend=True, title_text="Model Analysis Dashboard")
fig.update_yaxes(title_text="Importance Score", row=1, col=1)
fig.update_yaxes(title_text="Score", range=[0, 1], row=2, col=1)

# Display the plot
fig.show()

# Print Key Findings
print("\nKey Findings:")
print("\n1. Feature Importance:")
print("- RSI is the most influential indicator (17.6%)")
print("- MACD and Signal Line combined account for ~27% of predictive power")
print("- Moving averages have similar importance (~11% each)")

print("\n2. Model Performance:")
print("- Overall accuracy: 54% (slightly better than random)")
print("- Better at predicting downward movements (64% recall)")
print("- More balanced precision for both directions (52-56%)")

print("\n3. Latest Prediction:")
print("- Predicted Direction: Down")
print("- Confidence Level: 65%")
print("- Based on current technical indicators")

print("\nImportant Disclaimers:")
print("- Model accuracy is modest (54%) - use with caution")
print("- Past performance doesn't guarantee future results")
print("- Consider multiple data sources for trading decisions")


Key Findings:

1. Feature Importance:
- RSI is the most influential indicator (17.6%)
- MACD and Signal Line combined account for ~27% of predictive power
- Moving averages have similar importance (~11% each)

2. Model Performance:
- Overall accuracy: 54% (slightly better than random)
- Better at predicting downward movements (64% recall)
- More balanced precision for both directions (52-56%)

3. Latest Prediction:
- Predicted Direction: Down
- Confidence Level: 65%
- Based on current technical indicators

Important Disclaimers:
- Model accuracy is modest (54%) - use with caution
- Past performance doesn't guarantee future results
- Consider multiple data sources for trading decisions
