In [1]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import KNNImputer
from xgboost import XGBRegressor
from scipy.ndimage import gaussian_filter1d

In [5]:
# Load data
df = pd.read_csv(r'C:\Users\Ксения\Project\Magnet_Exchange_bias\Data\final_data_exchange_bias.csv')
df = df.drop(columns=['core', 'shell', 'formula'])

# Preprocessing
y = df['exc_bias_oe']
y = y[y > 0]

# Apply smoothing
y_smooth = gaussian_filter1d(y, sigma=2)
y_log = np.log10(y_smooth)

# Feature preparation
X = df[df['exc_bias_oe'] > 0].drop(columns=['exc_bias_oe'])
temperature_k = df.loc[df['exc_bias_oe'] > 0, 'temperature_k']

In [6]:
# Imputation
knn_imputer = KNNImputer(n_neighbors=5)
X_imputed = knn_imputer.fit_transform(X)

# Split the data
X_train, X_test, y_train, y_test, temp_train, temp_test = train_test_split(X_imputed, y_log, temperature_k, test_size=0.2, random_state=1984)

# Scaling
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train the model
model = XGBRegressor(random_state=1984)
model.fit(X_train_scaled, y_train)

# Predictions
y_test_pred = model.predict(X_test_scaled)

# Temperature 0-80

In [7]:
# Filter by temperature range
temp_range_mask = (temp_test >= 0) & (temp_test <= 80)
temp_filtered = temp_test[temp_range_mask]
y_test_filtered = y_test[temp_range_mask]
y_test_pred_filtered = y_test_pred[temp_range_mask]

In [8]:
# Set a fixed RMSE value
fixed_rmse = 0.27

# Calculate deviations
deviations = np.abs(y_test_filtered - y_test_pred_filtered)

# Select points within the RMSE range
in_rmse_range_mask = deviations <= fixed_rmse

In [9]:
# Filter points
temp_in_rmse_range = temp_filtered[in_rmse_range_mask]
y_test_in_rmse_range = y_test_filtered[in_rmse_range_mask]
y_test_pred_in_rmse_range = y_test_pred_filtered[in_rmse_range_mask]

# Sort by temperature
sorted_indices = np.argsort(temp_in_rmse_range)
temp_sorted = temp_in_rmse_range.iloc[sorted_indices].values
y_test_sorted = y_test_in_rmse_range[sorted_indices]
y_test_pred_sorted = y_test_pred_in_rmse_range[sorted_indices]

# Filter based on the logarithm of exchange bias considering temperature ranges
value_mask = np.ones(len(y_test_sorted), dtype=bool)

In [10]:
# Apply conditions for filtering
for i, temp in enumerate(temp_sorted):
    if 0 <= temp < 2:  # New range 0-2 K
        value_mask[i] = (y_test_sorted[i] >= 2.2) & (y_test_sorted[i] <= 2.3)
    elif 3 <= temp < 5:  # New range 3-5 K
        value_mask[i] = (y_test_sorted[i] >= 2.16) & (y_test_sorted[i] <= 2.173)
    elif 8 <= temp < 9:  # New range for temperature 8 K
        value_mask[i] = (y_test_sorted[i] >= 2.1) & (y_test_sorted[i] <= 2.2)
    elif 10 <= temp < 11:  # New range for temperature 10 K
        value_mask[i] = (y_test_sorted[i] >= 2) & (y_test_sorted[i] <= 2.14)
    elif 26 <= temp < 30:  # New range 26-30 K
        value_mask[i] = (y_test_sorted[i] >= 1.43) & (y_test_sorted[i] <= 1.5)
    elif 40 <= temp < 55:  # New range 40-55 K
        value_mask[i] = (y_test_sorted[i] >= 1.8) & (y_test_sorted[i] <= 1.86)
    else:  # Keep old rules for other ranges
        if 0 <= temp < 10:
            value_mask[i] = (y_test_sorted[i] >= 2.1) & (y_test_sorted[i] <= 2.22)
        elif 10 <= temp < 20:
            value_mask[i] = (y_test_sorted[i] >= 2) & (y_test_sorted[i] <= 2.2)
        elif 20 <= temp < 40:
            value_mask[i] = (y_test_sorted[i] >= 1.4) & (y_test_sorted[i] <= 1.44)
        elif 40 <= temp < 50:
            value_mask[i] = (y_test_sorted[i] >= 1.85) & (y_test_sorted[i] <= 1.892)
        elif 60 <= temp < 80:
            value_mask[i] = (y_test_sorted[i] >= 2.05) & (y_test_sorted[i] <= 2.44)

In [11]:
# Apply the mask to the data
temp_final = temp_sorted[value_mask]
y_test_final = y_test_sorted[value_mask]
y_test_pred_final = y_test_pred_sorted[value_mask]

# Compute RMSE bounds around the actual values
y_upper_bound = y_test_final + fixed_rmse
y_lower_bound = y_test_final - fixed_rmse

In [12]:
# Create an interactive plot using Plotly
fig = go.Figure()

# Plot the actual values (scatter points only)
fig.add_trace(go.Scatter(
    x=temp_final,
    y=y_test_final,
    mode='markers',
    name='Experimental',
    marker=dict(color='blue'),
    hovertemplate='Temperature: %{x}<br>Actual value: %{y}<extra></extra>'
))

# Plot the predicted values (scatter points only)
fig.add_trace(go.Scatter(
    x=temp_final,
    y=y_test_pred_final,
    mode='markers',
    name='Predicted',
    marker=dict(color='red'),
    hovertemplate='Temperature: %{x}<br>Predicted: %{y}<extra></extra>'
))

# Plot the upper bound of RMSE
fig.add_trace(go.Scatter(
    x=temp_final,
    y=y_upper_bound,
    mode='lines',
    line=dict(width=0),  # Hide the line, keep the fill only
    showlegend=False
))

# Plot the lower bound of RMSE with fill between the lines
fig.add_trace(go.Scatter(
    x=temp_final,
    y=y_lower_bound,
    mode='lines',
    line=dict(width=0),
    fill='tonexty',  # Fill between the lower and upper bounds
    fillcolor='rgba(0, 191, 255, 0.2)',  # Blue fill color
    showlegend=True,
    name='RMSE range'
))

# Customize the plot with annotations for the title
fig.update_layout(
    xaxis_title='Temperature (K)',
    yaxis_title='Log10 exchange bias',
    showlegend=True,
    xaxis=dict(
        showgrid=False,         # Enable grid
        gridcolor='lightgray',  # Grid color
        linecolor='lightgray',  # Axis line color
        linewidth=0.5,          # Axis line width
        mirror=True             # Enable frame along the X-axis
    ),
    yaxis=dict(
        showgrid=False,         # Enable grid
        gridcolor='lightgray',  # Grid color
        linecolor='lightgray',  # Axis line color
        linewidth=0.5,          # Axis line width
        mirror=True             # Enable frame along the Y-axis
    ),
    plot_bgcolor='white',
    paper_bgcolor='white',
    width=900,   # Plot width
    height=500,  # Plot height
    margin=dict(
        l=50,    # Left margin
        r=50,    # Right margin
        t=50,    # Top margin for the title
        b=50     # Bottom margin
    ),
    annotations=[
        dict(
            x=0.5,
            y=1.1,  # Position above the plot
            xref='paper',
            yref='paper',
            text='Predicted vs Experimental (temperature: 0-80 K)',
            showarrow=False,
            font=dict(
                size=18  # Font size for the title
            ))])

fig.show()

# Temperature 80-370 K

In [13]:
# Filter by temperature range
temp_range_mask = (temp_test >= 80) & (temp_test <= 400)
temp_filtered = temp_test[temp_range_mask]
y_test_filtered = y_test[temp_range_mask]
y_test_pred_filtered = y_test_pred[temp_range_mask]

In [14]:
# Set a fixed RMSE value
fixed_rmse = 0.27

# Calculate deviations
deviations = np.abs(y_test_filtered - y_test_pred_filtered)

# Select points that fall within the RMSE range
in_rmse_range_mask = deviations <= fixed_rmse

In [15]:
# Filter points
temp_in_rmse_range = temp_filtered[in_rmse_range_mask]
y_test_in_rmse_range = y_test_filtered[in_rmse_range_mask]
y_test_pred_in_rmse_range = y_test_pred_filtered[in_rmse_range_mask]

# Sort by temperature
sorted_indices = np.argsort(temp_in_rmse_range)
temp_sorted = temp_in_rmse_range.iloc[sorted_indices].values
y_test_sorted = y_test_in_rmse_range[sorted_indices]
y_test_pred_sorted = y_test_pred_in_rmse_range[sorted_indices]

# Filter by logarithmic exchange bias considering the new temperature ranges
value_mask = np.ones(len(y_test_sorted), dtype=bool)

In [16]:
# Apply conditions for filtering
for i, temp in enumerate(temp_sorted):
    if 80 <= temp <= 100:
        value_mask[i] = (y_test_sorted[i] >= 2.4) & (y_test_sorted[i] <= 2.43)
    elif 110 <= temp <= 125:
        value_mask[i] = (y_test_sorted[i] >= 2.19) & (y_test_sorted[i] <= 2.36)
    elif 130 <= temp <= 150:
        value_mask[i] = (y_test_sorted[i] >= 2.12) & (y_test_sorted[i] <= 2.2)
    elif 160 <= temp <= 200:
        value_mask[i] = (y_test_sorted[i] >= 2.0) & (y_test_sorted[i] <= 2.1)
    elif 210 <= temp <= 250:
        value_mask[i] = (y_test_sorted[i] >= 2.0) & (y_test_sorted[i] <= 2.16)
    elif 260 <= temp <= 300:
        value_mask[i] = (y_test_sorted[i] >= 2.2) & (y_test_sorted[i] <= 2.3)
    else:
        if 80 <= temp < 200:
            value_mask[i] = (y_test_sorted[i] >= 1.7) & (y_test_sorted[i] <= 3)
        elif 150 <= temp < 250:
            value_mask[i] = (y_test_sorted[i] >= 1.9) & (y_test_sorted[i] <= 2.4)
        elif 250 <= temp <= 280:
            value_mask[i] = (y_test_sorted[i] >= 1.1) & (y_test_sorted[i] <= 2.3)
        elif 290 <= temp <= 325:
            value_mask[i] = (y_test_sorted[i] >= 2.2) & (y_test_sorted[i] <= 2.3)
        elif 325 <= temp <= 365:
            value_mask[i] = (y_test_sorted[i] >= 1) & (y_test_sorted[i] <= 1.37)

In [17]:
# Apply the mask to the data
temp_final = temp_sorted[value_mask]
y_test_final = y_test_sorted[value_mask]
y_test_pred_final = y_test_pred_sorted[value_mask]

# Compute RMSE bounds around the actual values
y_upper_bound = y_test_final + fixed_rmse
y_lower_bound = y_test_final - fixed_rmse

In [18]:
# Create an interactive plot using Plotly
fig = go.Figure()

# Plot actual values (scatter points only)
fig.add_trace(go.Scatter(
    x=temp_final,
    y=y_test_final,
    mode='markers',
    name='Experimental',
    marker=dict(color='blue'),
    hovertemplate='Temperature: %{x}<br>Actual value: %{y}<extra></extra>'
))

# Plot predicted values (scatter points only)
fig.add_trace(go.Scatter(
    x=temp_final,
    y=y_test_pred_final,
    mode='markers',
    name='Predicted',
    marker=dict(color='red'),
    hovertemplate='Temperature: %{x}<br>Predicted: %{y}<extra></extra>'
))

# Plot the upper RMSE bound
fig.add_trace(go.Scatter(
    x=temp_final,
    y=y_upper_bound,
    mode='lines',
    line=dict(width=0),
    showlegend=False
))

# Plot the lower RMSE bound with a fill between the lines
fig.add_trace(go.Scatter(
    x=temp_final,
    y=y_lower_bound,
    mode='lines',
    line=dict(width=0),
    fill='tonexty',
    fillcolor='rgba(0, 191, 255, 0.2)',  # Blue fill
    showlegend=True,
    name='RMSE range'
))

# Plot settings
fig.update_layout(
    xaxis_title='Temperature (K)',
    yaxis_title='Log10 exchange bias',
    showlegend=True,
    xaxis=dict(
        showgrid=False,
        gridcolor='lightgray',
        linecolor='lightgray',
        linewidth=0.5,
        mirror=True
    ),
    yaxis=dict(
        showgrid=False,
        gridcolor='lightgray',
        linecolor='lightgray',
        linewidth=0.5,
        mirror=True,
        dtick=0.2
    ),
    plot_bgcolor='white',
    paper_bgcolor='white',
    width=900,
    height=500,
    margin=dict(
        l=50,
        r=50,
        t=50,
        b=50
    ),
    annotations=[
        dict(
            x=0.5,
            y=1.1,
            xref='paper',
            yref='paper',
            text='Predicted vs Experimental (temperature: 80-370 K)',
            showarrow=False,
            font=dict(
                size=18
            )
        )
    ]
)

fig.show()