In [1]:
# Importing neccesary Libraries 
import pandas as pd
import numpy as np

In [2]:
# loading the data
df = pd.read_csv("bitcoin_2017_to_2023.csv")
df.head()

Unnamed: 0,timestamp,open,high,low,close,volume,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume
0,2023-08-01 13:19:00,28902.48,28902.49,28902.48,28902.49,4.68658,135453.8,258,0.89391,25836.224836
1,2023-08-01 13:18:00,28902.48,28902.49,28902.48,28902.49,4.77589,138035.1,317,2.24546,64899.385195
2,2023-08-01 13:17:00,28908.52,28908.53,28902.48,28902.49,11.52263,333053.2,451,2.70873,78290.170121
3,2023-08-01 13:16:00,28907.41,28912.74,28907.41,28908.53,15.8961,459555.6,483,10.22981,295738.166916
4,2023-08-01 13:15:00,28896.0,28907.42,28893.03,28907.41,37.74657,1090761.0,686,16.50452,476955.246611


In [3]:
#checking if there is any missing values
missing_values = df.isnull().sum()
print(missing_values)

timestamp                       0
open                            0
high                            0
low                             0
close                           0
volume                          0
quote_asset_volume              0
number_of_trades                0
taker_buy_base_asset_volume     0
taker_buy_quote_asset_volume    0
dtype: int64


In [14]:
# Converting the 'timestamp' column to datetime
df['timestamp'] = pd.to_datetime(df['timestamp'], format='%Y-%m-%d %H:%M:%S')

# Resample data into 4-hour intervals using OHLC (Open, High, Low, Close) aggregation
df_resampled = df.resample('4H', on='timestamp').agg({
    'open': 'first',
    'high': 'max',
    'low': 'min',
    'close': 'last',
    'volume': 'sum',
    'quote_asset_volume': 'sum',
    'number_of_trades': 'sum',
    'taker_buy_base_asset_volume': 'sum',
    'taker_buy_quote_asset_volume': 'sum'
}).dropna()  # Removing any missing values after resampling

# Reset the index to make 'timestamp' a column again
df_resampled = df_resampled.reset_index()

# Display the cleaned, resampled data
print(df_resampled.head())


            timestamp     open     high      low    close      volume  \
0 2017-08-17 04:00:00  4261.48  4349.99  4261.32  4349.99   82.088865   
1 2017-08-17 08:00:00  4333.32  4485.39  4333.32  4427.30   63.619882   
2 2017-08-17 12:00:00  4427.30  4485.39  4333.42  4352.34  174.562001   
3 2017-08-17 16:00:00  4352.34  4354.84  4200.74  4325.23  225.109716   
4 2017-08-17 20:00:00  4307.56  4369.69  4258.56  4285.08  249.769913   

   quote_asset_volume  number_of_trades  taker_buy_base_asset_volume  \
0        3.531943e+05               334                    64.013727   
1        2.825012e+05               248                    58.787633   
2        7.742388e+05               858                   125.184133   
3        9.652911e+05               986                   165.036363   
4        1.079545e+06              1001                   203.226685   

   taker_buy_quote_asset_volume  
0                 275647.421911  
1                 261054.051154  
2                 555419.7

In [15]:
# EDA and plotting of graphs to understand the data more accurately 
import plotly.graph_objects as go
import plotly.io as pio

# Set the default renderer to 'browser'
pio.renderers.default = 'browser'


# Sorting data chronologically by the 'timestamp'
df_resampled = df_resampled.sort_values('timestamp')

# Plotting the candlestick chart
fig = go.Figure(data=[go.Candlestick(x=df_resampled['timestamp'],
                                     open=df_resampled['open'],
                                     high=df_resampled['high'],
                                     low=df_resampled['low'],
                                     close=df_resampled['close'])])

# Updating chart layout
fig.update_layout(title='Bitcoin Candlestick Chart (4-hour)',
                  xaxis_title='Date',
                  yaxis_title='Price (USD)',
                  xaxis_rangeslider_visible=False)

# Displaying the candlestick chart
fig.show()


In [16]:
# creating summary of dataset 
# Display column descriptions and the first few rows
print(df_resampled.info())  # Column types and non-null counts
print(df_resampled.head())  # First few rows of the dataset



# Display the date range
date_range = df_resampled['timestamp'].min(), df_resampled['timestamp'].max()
print(f"Date Range: {date_range[0]} to {date_range[1]}")



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13037 entries, 0 to 13036
Data columns (total 10 columns):
 #   Column                        Non-Null Count  Dtype         
---  ------                        --------------  -----         
 0   timestamp                     13037 non-null  datetime64[ns]
 1   open                          13037 non-null  float64       
 2   high                          13037 non-null  float64       
 3   low                           13037 non-null  float64       
 4   close                         13037 non-null  float64       
 5   volume                        13037 non-null  float64       
 6   quote_asset_volume            13037 non-null  float64       
 7   number_of_trades              13037 non-null  int64         
 8   taker_buy_base_asset_volume   13037 non-null  float64       
 9   taker_buy_quote_asset_volume  13037 non-null  float64       
dtypes: datetime64[ns](1), float64(8), int64(1)
memory usage: 1018.6 KB
None
            timestamp 

In [17]:
# Display statistics for the dataset
print(df_resampled.describe())  # Summary statistics for numerical columns

                           timestamp          open          high  \
count                          13037  13037.000000  13037.000000   
mean   2020-08-09 16:46:25.671549952  20083.483678  20302.566915   
min              2017-08-17 04:00:00   2870.900000   3148.000000   
25%              2019-02-13 04:00:00   7621.510000   7699.190000   
50%              2020-08-10 04:00:00  11699.790000  11795.000000   
75%              2022-02-04 08:00:00  29890.800000  30114.250000   
max              2023-08-01 12:00:00  68490.000000  69000.000000   
std                              NaN  16057.236768  16236.526160   

                low         close         volume  quote_asset_volume  \
count  13037.000000  13037.000000   13037.000000        1.303700e+04   
mean   19845.974489  20085.429416   12686.231657        2.771564e+08   
min     2817.000000   2919.000000       0.000000        0.000000e+00   
25%     7525.080000   7623.000000    4468.480706        4.048235e+07   
50%    11561.000000  11699.

In [18]:
# Pattern recognition - 
# 1. The upper trendline is flat, while the lower line is rising. This pattern indicates that buyers are more aggressive than sellers.
# 2. Simply measure the height of the pattern at its widest point and project that vertical distance from the breakout point.


In [12]:
# In real-time, the pattern would need to be updated continuously as new data comes in. 
# The same logic can be applied with a sliding window approach, where the new data is appended, and the pattern is checked incrementally.

In [None]:
# Check data types of all columns
print(df.dtypes)



In [None]:
# Convert the necessary columns to numeric and handle any conversion issues
columns_to_convert = ['open', 'high', 'low', 'close', 'volume', 'quote_asset_volume', 'number_of_trades', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume']

for col in columns_to_convert:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Drop rows with any NaN values in these columns after conversion
df.dropna(subset=columns_to_convert, inplace=True)

# Ensure the conversion worked
print(df.dtypes)



In [None]:
# Pattern identification algorithm is broken down into 3 block of code so it takes less time to process.

In [20]:
#breaking down the code
import pandas as pd
import numpy as np
import plotly.graph_objs as go
from scipy.signal import argrelextrema

# Ensure numeric columns
columns_to_convert = ['open', 'high', 'low', 'close']
df[columns_to_convert] = df[columns_to_convert].apply(pd.to_numeric, errors='coerce')

# Drop rows with NaN values in the relevant columns
df.dropna(subset=columns_to_convert, inplace=True)

# Detect local maxima and minima using scipy's argrelextrema
def detect_local_extrema(df, order=5):
    df['local_max'] = df['high'][argrelextrema(df['high'].values, np.greater, order=order)[0]]
    df['local_min'] = df['low'][argrelextrema(df['low'].values, np.less, order=order)[0]]
    return df

df = detect_local_extrema(df)


In [21]:
# Function to detect ascending triangle pattern
def detect_ascending_triangle(df, resistance_tolerance=0.01, min_points=3):
    # Step 1: Find resistance level (mean of local maxima)
    local_maxima = df['local_max'].dropna()
    if len(local_maxima) < min_points:
        return False  # Not enough maxima to form a pattern
    
    resistance_level = local_maxima.mean()
    
    # Check if all maxima are within the tolerance of the resistance level
    resistance_hits = local_maxima.apply(lambda x: abs(x - resistance_level) / resistance_level < resistance_tolerance).sum()

    if resistance_hits < min_points:
        return False  # Not enough resistance touches
    
    # Step 2: Identify upward-sloping support
    local_minima = df['local_min'].dropna()
    if len(local_minima) < min_points:
        return False  # Not enough minima to form a pattern
    
    # Calculate slope of support line using linear regression (fit a line to the local minima)
    slope, intercept = np.polyfit(local_minima.index, local_minima.values, 1)
    
    # Check if the slope is positive (indicating an upward trend in lows)
    if slope > 0:
        return True  # Ascending triangle pattern detected
    else:
        return False  # No pattern detected

# Initialize list for detected patterns
detected_patterns = []
window_size = 30

# Loop over larger chunks of data to detect patterns
for i in range(0, len(df), window_size):
    window = df.iloc[i:i+window_size]
    if detect_ascending_triangle(window):
        detected_patterns.append(window)


In [22]:
# Initialize Plotly figure for the candlestick chart
fig = go.Figure()

# Plot only detected patterns
for idx, window in enumerate(detected_patterns):
    fig.add_trace(go.Candlestick(x=window.index,
                                 open=window['open'],
                                 high=window['high'],
                                 low=window['low'],
                                 close=window['close'],
                                 name=f"Ascending Triangle #{idx+1}"))

# Update the layout of the plot
fig.update_layout(title=f"Total number of ascending triangles detected: {len(detected_patterns)}")
fig.show()


In [26]:
# here I am plotting timestamps on the detected patterns
# Initialize Plotly figure for the candlestick chart
fig = go.Figure()

# Plot only detected patterns
for idx, window in enumerate(detected_patterns):
    # Get the start and end timestamps
    start_timestamp = window.index[0]
    end_timestamp = window.index[-1]
    
    # Add the candlestick trace for the detected pattern
    fig.add_trace(go.Candlestick(x=window.index,
                                 open=window['open'],
                                 high=window['high'],
                                 low=window['low'],
                                 close=window['close'],
                                 name=f"Ascending Triangle #{idx+1}"))
    
    # Add annotations for the start and end timestamps
    fig.add_annotation(x=start_timestamp, y=window['low'].min(),
                       text=f"Start: {start_timestamp}",
                       showarrow=True, arrowhead=2, ax=0, ay=-40, font=dict(color="green"))
    
    fig.add_annotation(x=end_timestamp, y=window['high'].max(),
                       text=f"End: {end_timestamp}",
                       showarrow=True, arrowhead=2, ax=0, ay=-40, font=dict(color="red"))

# Update the layout of the plot
fig.update_layout(title=f"Total number of ascending triangles detected: {len(detected_patterns)}",
                  xaxis_title="Timestamp", yaxis_title="Price")

fig.show()


In [27]:
# Initialize Plotly figure for the candlestick chart
fig = go.Figure()

# Plot only the first five detected ascending triangles
for idx, window in enumerate(detected_patterns[:5]):
    # Get the start and end timestamps
    start_timestamp = window.index[0]
    end_timestamp = window.index[-1]
    
    # Add the candlestick trace for the detected pattern
    fig.add_trace(go.Candlestick(x=window.index,
                                 open=window['open'],
                                 high=window['high'],
                                 low=window['low'],
                                 close=window['close'],
                                 name=f"Ascending Triangle #{idx+1}"))
    
    # Calculate and plot the lower trendline (support) - connecting the lowest points
    low_points = window[['low']].reset_index()
    lower_trendline = low_points.copy()  # Just to store the same low points
    fig.add_trace(go.Scatter(x=lower_trendline['index'],
                             y=lower_trendline['low'],
                             mode='lines',
                             line=dict(color="blue", dash='dash'),
                             name=f"Lower Trendline #{idx+1}"))

    # Calculate and plot the upper trendline (resistance) - connecting the highest points
    high_points = window[['high']].reset_index()
    upper_trendline = high_points.copy()  # Just to store the same high points
    fig.add_trace(go.Scatter(x=upper_trendline['index'],
                             y=upper_trendline['high'],
                             mode='lines',
                             line=dict(color="red", dash='dash'),
                             name=f"Upper Trendline #{idx+1}"))

    # Add annotations for the start and end timestamps
    fig.add_annotation(x=start_timestamp, y=window['low'].min(),
                       text=f"Start: {start_timestamp}",
                       showarrow=True, arrowhead=2, ax=0, ay=-40, font=dict(color="green"))
    
    fig.add_annotation(x=end_timestamp, y=window['high'].max(),
                       text=f"End: {end_timestamp}",
                       showarrow=True, arrowhead=2, ax=0, ay=-40, font=dict(color="red"))

# Update the layout of the plot
fig.update_layout(title=f"First 5 Ascending Triangles with Trendlines",
                  xaxis_title="Timestamp", yaxis_title="Price",
                  legend_title="Patterns & Trendlines")

fig.show()


In [28]:
#plotting trendlines for first 5 triangles 
# Initialize Plotly figure for the candlestick chart
fig = go.Figure()

# Plot only the first five detected ascending triangles
for idx, window in enumerate(detected_patterns[:5]):
    # Get the start and end timestamps
    start_timestamp = window.index[0]
    end_timestamp = window.index[-1]
    
    # Add the candlestick trace for the detected pattern
    fig.add_trace(go.Candlestick(x=window.index,
                                 open=window['open'],
                                 high=window['high'],
                                 low=window['low'],
                                 close=window['close'],
                                 name=f"Ascending Triangle #{idx+1}"))

    # Upper Trendline (Horizontal) - resistance, same value across all timestamps
    resistance_level = window['high'].max()  # The highest price (resistance)
    fig.add_trace(go.Scatter(x=[start_timestamp, end_timestamp],
                             y=[resistance_level, resistance_level],  # Flat line at resistance level
                             mode='lines',
                             line=dict(color="red", dash='solid', width=2),
                             name=f"Upper Trendline #{idx+1}"))

    # Lower Trendline (Diagonal) - support, connecting the low points
    support_start = window['low'].min()  # Starting support (lowest low in the window)
    support_end = window['low'].iloc[-1]  # Ending support (low of the last candle)
    fig.add_trace(go.Scatter(x=[start_timestamp, end_timestamp],
                             y=[support_start, support_end],  # Diagonal line connecting lows
                             mode='lines',
                             line=dict(color="blue", dash='solid', width=2),
                             name=f"Lower Trendline #{idx+1}"))

    # Add annotations for the start and end timestamps
    fig.add_annotation(x=start_timestamp, y=support_start,
                       text=f"Start: {start_timestamp}",
                       showarrow=True, arrowhead=2, ax=0, ay=-40, font=dict(color="green"))
    
    fig.add_annotation(x=end_timestamp, y=resistance_level,
                       text=f"End: {end_timestamp}",
                       showarrow=True, arrowhead=2, ax=0, ay=-40, font=dict(color="red"))

# Update the layout of the plot
fig.update_layout(title=f"First 5 Ascending Triangles with Horizontal Upper and Rising Lower Trendlines",
                  xaxis_title="Timestamp", yaxis_title="Price",
                  legend_title="Patterns & Trendlines")

fig.show()


In [29]:
# bactesting stratergy
import pandas as pd

# Assuming 'data' is a DataFrame that contains your price data with 'open', 'high', 'low', 'close', and 'timestamp'
# Assuming 'detected_patterns' contains detected ascending triangle windows

initial_balance = 10000  # Starting balance for backtesting
balance = initial_balance
trade_size = 1000  # Amount to trade per trade
open_positions = []  # To store ongoing trades
closed_trades = []  # To store completed trades

# Backtesting loop - iterate through detected patterns
for idx, window in enumerate(detected_patterns[:5]):  # Backtest only first 5 patterns for this example
    start_timestamp = window.index[0]
    end_timestamp = window.index[-1]

    # Upper trendline (resistance) - we will break this for a buy signal
    resistance_level = window['high'].max()
    
    # Go through the data after the end of the pattern to see when a breakout happens
    for i in range(end_timestamp, len(data)):  # Start checking after the pattern ends
        current_price = data.loc[data.index[i], 'close']
        timestamp = data.index[i]
        
        # Entry Condition: Breakout above the resistance level
        if current_price > resistance_level:
            entry_price = current_price
            entry_timestamp = timestamp
            open_positions.append({'entry_price': entry_price, 'entry_time': entry_timestamp})
            print(f"Buy signal at {entry_price} on {entry_timestamp}")
            break  # Assume one trade per pattern

# Simulating exits and calculating profit/loss
for position in open_positions:
    entry_price = position['entry_price']
    entry_time = position['entry_time']
    
    # Loop through data after entry to find exit point (this could be a fixed stop loss or a take profit level)
    for i in range(data.index.get_loc(entry_time), len(data)):
        current_price = data.loc[data.index[i], 'close']
        timestamp = data.index[i]
        
        # Exit condition 1: Fixed target profit (e.g., 5%)
        if current_price >= entry_price * 1.05:
            profit = trade_size * (current_price - entry_price) / entry_price
            closed_trades.append({'entry_price': entry_price, 'exit_price': current_price, 'profit': profit, 'exit_time': timestamp})
            balance += profit
            print(f"Sell signal at {current_price} on {timestamp}, Profit: {profit}")
            break
        
        # Exit condition 2: Fixed stop loss (e.g., 2% below entry price)
        elif current_price <= entry_price * 0.98:
            loss = trade_size * (entry_price - current_price) / entry_price
            closed_trades.append({'entry_price': entry_price, 'exit_price': current_price, 'profit': -loss, 'exit_time': timestamp})
            balance -= loss
            print(f"Sell signal at {current_price} on {timestamp}, Loss: {-loss}")
            break

# Performance Summary
total_trades = len(closed_trades)
total_profit = sum([trade['profit'] for trade in closed_trades])
win_trades = sum([1 for trade in closed_trades if trade['profit'] > 0])
lose_trades = total_trades - win_trades
win_rate = win_trades / total_trades * 100 if total_trades > 0 else 0

print(f"\nBacktest Summary:")
print(f"Total Trades: {total_trades}")
print(f"Winning Trades: {win_trades}")
print(f"Losing Trades: {lose_trades}")
print(f"Total Profit: {total_profit}")
print(f"Final Balance: {balance}")
print(f"Win Rate: {win_rate:.2f}%")


NameError: name 'data' is not defined