# Knee Point Calculation for Optimal Clusters
## Determine optimal number of clusters using knee method


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import NearestNeighbors
from kneed import KneeLocator
import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8')
print("Libraries imported successfully!")


In [None]:
# Load the independent features dataset
df = pd.read_csv('stock_data_independent_features.csv')
print(f"Dataset shape: {df.shape}")
print(f"Features: {list(df.columns)}")


In [None]:
# Knee point calculation function (from Taller_4.ipynb)
def get_knee_point(data):
    neighbors = 2
    # Calculate k-nearest neighbors distances
    nbrs = NearestNeighbors(n_neighbors=neighbors).fit(data)
    distances, indices = nbrs.kneighbors(data)
    distance_desc = np.sort(distances, axis=0)
    distance_desc = distance_desc[:,1]  # Take second nearest neighbor distance
    
    # Find knee point using KneeLocator
    kneedle = KneeLocator(range(1,len(distance_desc)+1),  # x values
                          distance_desc,  # y values
                          S=1.0,  # parameter suggested from paper
                          curve="convex",  # parameter from figure
                          direction="increasing")  # parameter from figure
    
    kneedle.plot_knee()
    print("-" * 50)
    print(f"Knee point: {kneedle.knee}")
    print(f"Knee value: {kneedle.knee_y:.3f}")
    print("-" * 50)
    
    return kneedle.knee, kneedle.knee_y

print("Knee point calculation function defined!")


In [None]:
# Calculate knee point for optimal number of clusters
optimal_k, knee_value = get_knee_point(df.values)

print(f"\n=== RESULTS ===")
print(f"Optimal number of clusters: {optimal_k}")
print(f"Knee value: {knee_value:.3f}")
print(f"\nRecommendation: Use {optimal_k} clusters for clustering analysis")
