In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import LocalOutlierFactor
from sklearn.preprocessing import StandardScaler

from sklearn.decomposition import PCA

X_scaled = data.select_dtypes(include=[np.number]) 

n_neighbors = 20

thresholds = [-1.5, -1.4, -1.3, -1.2]

results = []

lof = LocalOutlierFactor(n_neighbors=n_neighbors)

y_pred = lof.fit_predict(X_scaled)

lof_scores = lof.negative_outlier_factor_

for threshold in thresholds:
    outlier_mask = lof_scores < threshold
    outlier_count = np.sum(outlier_mask)
    results.append({
        'threshold': threshold,
        'outlier_count': outlier_count
    })

results_df = pd.DataFrame(results)
print(results_df)
selected_threshold = -1.23

data['prediction'] = y_pred  
data['lof_scores'] = lof_scores

data['outlier'] = lof_scores < selected_threshold

normal_data = data[~data['outlier']]

plt.figure(figsize=(10, 6))

plt.rcParams['font.family'] = 'Times New Roman'


plt.hist(lof_scores, bins=50, edgecolor='k', color='skyblue', alpha=0.7)

plt.title('Distribution of LOF Decision Function Scores', fontsize=16)
plt.xlabel('Local Outlier Factor (LOF)', fontsize=14)
plt.ylabel('Number of Samples', fontsize=14)

plt.grid(axis='y', linestyle='--', alpha=0.7)

plt.savefig('lof_distribution.png', dpi=300, bbox_inches='tight')

plt.tight_layout() 
plt.show()