# Evaluate the Performance of Firsov on Other Trajectories
The goal here is to compare against our ML model

In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
from stopping_power_ml.integrator import TrajectoryIntegrator
from sklearn.linear_model import LinearRegression
import pickle as pkl
import pandas as pd
import numpy as np
import os

## Load in the Tools
We'll need the data for these trajectories and a new model

In [2]:
with open(os.path.join('..', 'data', 'new_data.pkl'), 'rb') as fp:
    new_data = pkl.load(fp)

In [3]:
start_frame = pkl.load(open(os.path.join('..', '..', 'al_starting_frame.pkl'), 'rb'))

In [4]:
with open(os.path.join('..', 'traj_computer.pkl'), 'rb') as fp:
    traj_computer = pkl.load(fp)

In [5]:
with open('firsov.pkl', 'rb') as fp:
    firsov = pkl.load(fp)

In [6]:
new_data['channel'] = pd.read_pickle(os.path.join('..', 'data', 'channel_data.pkl.gz'))

In [7]:
new_data['random'] = pd.read_pickle(os.path.join('..', 'data', 'random_data.pkl.gz'))

## Run the Model on Each Trajectory
Use the model stored with the trajectory computer, which was trained on the random and channel trajectories

In [None]:
%%time
for subset in new_data.values():
    subset['force_pred_ml'] = traj_computer.model.predict(subset[traj_computer.featurizers.feature_labels()])
    subset['force_pred_firsov'] = np.squeeze(firsov.featurize_many(subset[['position', 'velocity']], pbar=False))

## Compare the Model Accuracy
Get a sense for how well this model performed

In [None]:
fig, axs = plt.subplots(2, 3)

for i, (ax, (d, subset)) in enumerate(zip(axs.flatten(), new_data.items())):
    subset = subset[~ subset['initial']]
    ax.plot(subset['displacement'], subset['force'], 'k--')
    ax.plot(subset['displacement'], subset['force_pred_ml'], 'r', lw=1, alpha=0.8)
    ax.plot(subset['displacement'], subset['force_pred_firsov'], 'b', lw=1, alpha=0.8)
    
    if subset['force'].max() > 1:
        ax.set_yscale('symlog')
    
    # Make a label for the figure
    ax.text(0, 1, d, bbox={'facecolor': 'w', 'edgecolor': 'k'},
           transform=ax.transAxes)
    
# Make axes labels
for ax in axs[-1, :]:
    ax.set_xlabel('Displacement ($a_B$)')
for ax in axs[:, 0]:
    ax.set_ylabel('Force ($E_H / a_B$)')

fig.tight_layout()
fig.set_size_inches(6.5, 3.5)
fig.savefig(os.path.join('figures', 'validation-set.png'), dpi=320)

*Finding*: Storng qualitative agreement with the stopping forces

# Compute Stopping Power for Channels
Two of the calculations are aligned along crystallographic axes. As such, we need to determine the stopping power along one repeat of the unit cell

## Make a Trajectory Computer for the Firsov Model
The TrajecotryIntegrator class has some useful utilities, and needs an ML model to work. As the output from the Firsov model is our desired force, we'll just make a $f(x) = x$ model using sklearn's linear model

In [None]:
firsov_linreg = LinearRegression()
firsov_linreg.coef_ = np.array([1])
firsov_linreg.intercept_ = 0

In [None]:
traj_computer_firsov = TrajectoryIntegrator(start_frame, firsov_linreg, firsov)

In [None]:
def get_channel_stopping_power(data, direction, traj_computer):
    """Get the stopping power along a channel
    
    Args:
        data (DataFrame): TD-DFT data for a certain channel trajectory
        direction ([int]): Directions in lattice coordinates
        traj_computer (TrajectoryIntegrator): Tool used to compute the stopping force
    Returns:
        - (float) Stopping power from TD-DFT
        - (float) Stopping power from the trajecotry computer
    """
    
    # Get the length of the channel
    length = np.linalg.norm(traj_computer._compute_trajectory(direction))
    
    # Average over all possible choices
    max_distance = data['displacement'].max() - length
    stops = [
        data.query('displacement >= {} and displacement <= {}'.format(x['displacement'], x['displacement'] + length))['force'].mean()
        for i, x in data.query('displacement < {}'.format(max_distance)).iterrows()
    ]
    return np.median(stops), traj_computer.compute_stopping_power(data.iloc[0]['position'], direction, 1, abserr=1e-4)

Print out the results of each channel direction

In [None]:
channels = {
    'Dmod_v1.0': [2, 1, 1],
    'Dmin_v1.0': [1, 2, 2],
    'channel': [1, 0, 0]
}

In [None]:
for key, direct in channels.items():
    tddft, (ml, ml_err) = get_channel_stopping_power(new_data[key], direct, traj_computer)
    tddft, (firsov, firsov_err) = get_channel_stopping_power(new_data[key], direct, traj_computer_firsov)
    
    print('{}: TD-DFT: {:.3f} - ML {:.3f} - Firsov {:.3f}'.format(key, tddft, ml, firsov))

*Finding*: We overestimate the channel, and under estimate the random directions

## Compare the Ordering of Stopping Powers
See how well ML and TD-DFT match up

In [None]:
results = []
for key, subset in new_data.items():
    subset = subset[~ subset['initial']]
    results.append({'name': key, 
                    'tddft': subset['force'].mean(),
                    'ml': subset['force_pred_ml'].mean(),
                    'firsov': subset['force_pred_firsov'].mean()})
results = pd.DataFrame(results)

In [None]:
results

In [None]:
fig, ax = plt.subplots()

ax.scatter(results['tddft'], results['ml'], label='ML')
ax.scatter(results['tddft'], results['firsov'], label='Firsov')

min_value = results.drop(['name', 'firsov'], axis=1).min().min() * 0.98
max_value = results.drop(['name', 'firsov'], axis=1).max().max() * 1.02

ax.set_xlim([min_value, max_value])
ax.set_ylim([min_value, max_value * 10])

ax.plot(ax.get_xlim(), ax.get_xlim(), 'k--')

ax.set_xlabel('$S$, TD-DFT ($E_H / a_B$)')
ax.set_ylabel('$S$, Predicted ($E_H / a_B$)')

ax.set_yscale('log')

fig.set_size_inches(3.5, 2.5)
fig.tight_layout()
fig.savefig(os.path.join('figures', 'stopping-power-comparison.png'), dpi=320)

*Finding*: Firsov model is terrible compared to ML