In [None]:
import pandas as pd
from pathlib import Path
from lib.config import AppConfig
import numpy as np

config = AppConfig()

In [None]:
ds_path = Path(config.composition_data_path)

In [None]:
data = pd.read_csv(ds_path)

In [None]:
import matplotlib.pyplot as plt
from lib.reproduction import major_oxides

# Convert data to numeric, ignoring non-numeric values
for oxide in major_oxides:
    data[oxide] = pd.to_numeric(data[oxide], errors='coerce')

# Function to calculate nice round x-ticks
def calculate_xticks(max_value, num_ticks=10):
    step = np.ceil(max_value / num_ticks)
    return np.arange(0, max_value + step, step)

# Set up the figure and subplots
fig, axs = plt.subplots(4, 2, figsize=(14, 18))
axs = axs.ravel()

# Generate the histogram for each major oxide
for i, oxide in enumerate(major_oxides):
    values = data[oxide].dropna()
    axs[i].hist(values, bins=30, edgecolor='black')
    axs[i].set_title(f'Distribution of {oxide}', fontsize=14)
    axs[i].set_xlabel(f'{oxide} (%)', fontsize=12)
    axs[i].set_ylabel('Frequency', fontsize=12)
    axs[i].grid(axis='y', linestyle='--', linewidth=0.7)
    
    # Adjust x-ticks for nice and round numbers, starting from 0
    max_value = values.max()
    xticks = calculate_xticks(max_value)
    axs[i].set_xticks(xticks)
    axs[i].set_xticklabels([f'{tick:.0f}' for tick in xticks], rotation=45, ha='right')

# Adjust layout for better spacing
plt.tight_layout()

# Show the figure
plt.show()