# HexDetector Data Exploration

This notebook is designed for initial data exploration and visualization of the HexDetector project. The goal is to understand the dataset and identify any patterns or anomalies.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set visualization style
sns.set(style='whitegrid')

In [2]:
# Load the dataset
raw_data_path = '../data/raw/data.csv'
data = pd.read_csv(raw_data_path)

# Display the first few rows of the dataset
data.head()

In [3]:
# Summary statistics
data.describe()

In [4]:
# Check for missing values
missing_values = data.isnull().sum()
missing_values[missing_values > 0]

In [5]:
# Visualize the distribution of a specific feature
plt.figure(figsize=(10, 6))
sns.histplot(data['feature_name'], bins=30, kde=True)
plt.title('Distribution of Feature Name')
plt.xlabel('Feature Name')
plt.ylabel('Frequency')
plt.show()

In [6]:
# Correlation heatmap
plt.figure(figsize=(12, 8))
correlation_matrix = data.corr()
sns.heatmap(correlation_matrix, annot=True, fmt='.2f', cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()