In [38]:
import pandas as pd

In [39]:
main = pd.read_csv('XAUUSD_30m.csv')

In [40]:
main.drop(columns="tick_volume", inplace=True)


In [41]:
main.head()

Unnamed: 0,time,open,high,low,close
0,1/2/2018 1:00,1302.96,1306.3,1302.26,1306.23
1,1/2/2018 1:30,1306.22,1307.68,1306.14,1306.79
2,1/2/2018 2:00,1306.79,1306.88,1305.27,1305.9
3,1/2/2018 2:30,1305.91,1307.3,1305.89,1306.34
4,1/2/2018 3:00,1306.33,1306.46,1304.23,1306.26


In [42]:
import pandas as pd
from sklearn.cluster import KMeans
import numpy as np

# Sample DataFrame creation (this should be your actual data)

df = main.copy()

# Extracting closing prices
closing_prices = df['close'].values.reshape(-1, 1)

# Perform KMeans clustering (for example, 3 clusters)
kmeans = KMeans(n_clusters=50, random_state=42)
df['cluster'] = kmeans.fit_predict(closing_prices)

# Assigning weight based on cluster size
cluster_sizes = df['cluster'].value_counts().to_dict()

# Create a new dataset with price level ranges and weights
price_level_weight = []

for cluster_id in df['cluster'].unique():
    cluster_prices = df[df['cluster'] == cluster_id]['close']
    min_price = cluster_prices.min()
    max_price = cluster_prices.max()
    
    # Assign weight as the size of the cluster
    weight = cluster_sizes[cluster_id]
    
    # Add to the new dataset
    price_level_weight.append([f"{min_price:.2f}-{max_price:.2f}", weight])

# Creating a new DataFrame for price levels and weights
price_level_df_kmeans = pd.DataFrame(price_level_weight, columns=['Price Level', 'Weight'])


In [43]:
price_level_df_kmeans.columns

Index(['Price Level', 'Weight'], dtype='object')

In [44]:
price_level_df_kmeans.shape

(50, 2)

In [45]:
price_level_df_sorted_by_weight = price_level_df_kmeans.sort_values(by='Weight', ascending=False)
# Sort by Price Level
price_level_df_sorted_by_price = price_level_df_kmeans.sort_values(by='Price Level')

In [46]:
price_level_df_kmeans.tail()

Unnamed: 0,Price Level,Weight
45,2969.76-3065.67,619
46,3066.31-3171.47,310
47,3173.23-3274.55,270
48,3274.83-3359.40,469
49,3360.35-3494.28,191


In [53]:
from sklearn.cluster import DBSCAN
import pandas as pd

# Assuming 'main' is your original DataFrame containing the 'close' column as the closing prices.

# Step 1: Identify the turning prices (local maxima and minima)
turning_prices = []

# Loop through the closing prices to find local maxima and minima
for i in range(1, len(main) - 1):  # Avoid first and last as they can't be turning points
    prev_close = main['close'].iloc[i - 1]
    current_close = main['close'].iloc[i]
    next_close = main['close'].iloc[i + 1]

    if (prev_close < current_close > next_close):  # Local maximum
        turning_prices.append(current_close)
    elif (prev_close > current_close < next_close):  # Local minimum
        turning_prices.append(current_close)

# Convert turning prices to a DataFrame
turning_prices_df = pd.DataFrame(turning_prices, columns=['turning_price'])

# Step 2: Apply DBSCAN to turning prices
turning_prices_values = turning_prices_df['turning_price'].values.reshape(-1, 1)

# Apply DBSCAN to cluster the turning prices
dbscan = DBSCAN(eps=0.1, min_samples=5)  # Adjust parameters as needed
turning_prices_df['dbscan_cluster'] = dbscan.fit_predict(turning_prices_values)

# Step 3: Create a new dataframe with price levels and their corresponding weights
price_level_weight = []

# Iterate over each cluster label and calculate price levels and count the number of points in each cluster
for cluster_id in turning_prices_df['dbscan_cluster'].unique():
    if cluster_id == -1:
        continue  # Skip noise points

    # Filter data points in the current cluster
    cluster_data = turning_prices_df[turning_prices_df['dbscan_cluster'] == cluster_id]

    # Calculate the price range (min and max) for the cluster
    min_price = cluster_data['turning_price'].min()
    max_price = cluster_data['turning_price'].max()

    # The weight will be the number of points in this cluster
    weight = len(cluster_data)

    # Add to the new price level dataset
    price_level_weight.append([f"{min_price:.2f}-{max_price:.2f}", weight])

# Create the final dataframe with price levels and weights
price_level_dbscan = pd.DataFrame(price_level_weight, columns=['Price Level', 'Weight'])

# Display the resulting DataFrame
price_level_dbscan.head()


Unnamed: 0,Price Level,Weight
0,1305.38-1339.24,2569
1,1339.33-1345.06,311
2,1349.50-1349.95,14
3,1352.74-1353.01,14
4,1352.11-1352.69,19


In [54]:
price_level_df_sorted_by_weight = price_level_dbscan.sort_values(by='Weight', ascending=False)
# Sort by Price Level
price_level_df_sorted_by_price = price_level_dbscan.sort_values(by='Price Level')

In [55]:
price_level_df_sorted_by_price.tail()

Unnamed: 0,Price Level,Weight
839,2928.34-2928.63,7
842,2933.30-2933.60,7
843,2941.64-2941.82,5
846,3032.77-3032.90,5
847,3329.92-3330.05,5


In [56]:
price_level_df_sorted_by_weight.head()

Unnamed: 0,Price Level,Weight
467,1767.74-1820.28,5058
480,1898.22-1940.90,3261
0,1305.38-1339.24,2569
15,1277.54-1299.96,1863
470,1845.78-1872.34,1808
