# Machine learning pipeline for modelling fraction of precipitation in different intensity bands

This notebook is designed to be run in AzureML 

### Import relevant libraries

In [1]:
import pathlib
import os
import datetime
import json

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
# %load_ext tensorboard

In [4]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Activation, Flatten
from tensorflow.keras.layers import Conv1D, concatenate
from tensorflow.keras.layers import ZeroPadding1D, Reshape, Input, Dropout, PReLU
from tensorflow.keras.models import Sequential, Model

In [5]:
import prd_pipeline

In [6]:
pathlib.Path.cwd().parent

PosixPath('/mnt/batch/tasks/shared/LS_root/mounts/clusters/prd-ml-fractions/code/Users/hannah.brown/precip_rediagnosis')

Set up MLops for experiment tracking in AzureML

In [7]:
import mlflow
mlflow.tensorflow.autolog()



In [8]:
import azureml.core
from azureml.core import Workspace, Datastore, Dataset
from azureml.core import Experiment

prd_ws = Workspace.from_config()

In [9]:
mlflow.set_tracking_uri(prd_ws.get_mlflow_tracking_uri())

### Load and preprocess data

In [10]:
prd_prefix = 'prd'
merged_prefix = prd_prefix + '_merged'
csv_file_suffix = 'csv'

In [11]:
bands = {
    '0.0':[0, 0.01],
    '0.25':[0.01, 0.5], 
    '2.5': [0.5, 4], 
    '7.0':[4, 10], 
    '10.0':[10,220]
}

In [12]:
intensity_band_template = '{source}_fraction_in_band_instant_{band_centre}'

In [13]:
target_parameter = [intensity_band_template.format(source='radar', band_centre=threshold) for threshold in bands.keys()]
nwp_comparison = [intensity_band_template.format(source='mogrepsg', band_centre=threshold) for threshold in bands.keys()]

profile_features = ['air_temperature', 'relative_humidity', 'wind_speed', 'wind_from_direction', 'cloud_volume_fraction'] #'air_pressure',
single_lvl_features = []#'thickness_of_rainfall_amount', 'surface_altitude', 'air_pressure_at_sea_level', 'cloud_area_fraction'

In [14]:
feature_dict = {
    'profile': profile_features,
    'single_level': single_lvl_features,
    'target': target_parameter,
    'nwp': nwp_comparison, 
    'metadata': ['time', 'realization', 'latitude', 'longitude']
} 

In [15]:
load_all = True

In [16]:
if load_all:
    prd_azml_dataset_name = 'prd_merged_all_events_files'
else:
    prd_azml_dataset_name = 'prd_merged_202110_nswws_amber_oct_files'


In [17]:
prd_azml_dataset = azureml.core.Dataset.get_by_name(prd_ws, name=prd_azml_dataset_name)

In [18]:
with prd_azml_dataset.mount() as prd_mount:
    print('loading all data')
    prd_path_list = [p1 for p1 in pathlib.Path(prd_mount.mount_point).rglob('*csv') ]
    merged_df = pd.concat([pd.read_csv(p1) for p1 in prd_path_list])

Not mounting as a volume: ArgumentError(InvalidArgument { argument: "arguments.path", expected: "Glob patterns inside the path are not supported by the volume mount.Path must be a direct path to the file or folder, or end with '/**' or '/**/*' to match the entire content of the volume.", actual: "REDACTED" }). 
Falling back to dataflow mount.
loading all data


### Calculate NWP probabilities of falling in each intensity bands - <i> to be moved into data prep </i>

In [19]:
def calc_nwp_probabilities(data, lower_bound, upper_bound):
    return ((data>=lower_bound) & (data<upper_bound)).sum()/data.shape[0]

In [20]:
# %%timeit
nwp_fractions = [
    merged_df.groupby(['latitude', 'longitude', 'time'])[['rainfall_rate']].apply(
        lambda x: calc_nwp_probabilities(x, lower_bound, upper_bound)).rename(columns={'rainfall_rate':intensity_band_template.format(source='mogrepsg', band_centre=intensity_band)})
    for intensity_band, [lower_bound, upper_bound] in bands.items()]

In [21]:
nwp_prob_df = pd.concat(nwp_fractions, axis=1)

In [22]:
merged_df = pd.merge(merged_df, nwp_prob_df, left_on=['latitude', 'longitude', 'time'], right_index=True)

In [23]:
merged_df.head()

Unnamed: 0,realization,latitude,longitude,forecast_period,forecast_reference_time,time,cloud_area_fraction,surface_altitude,air_pressure_at_sea_level,rainfall_rate,...,radar_fraction_in_band_instant_0.0,radar_fraction_in_band_instant_0.25,radar_fraction_in_band_instant_2.5,radar_fraction_in_band_instant_7.0,radar_fraction_in_band_instant_10.0,mogrepsg_fraction_in_band_instant_0.0,mogrepsg_fraction_in_band_instant_0.25,mogrepsg_fraction_in_band_instant_2.5,mogrepsg_fraction_in_band_instant_7.0,mogrepsg_fraction_in_band_instant_10.0
0,0,49.40625,-5.484375,0 days 06:00:00,2020-02-07 12:00:00,2020-02-07 18:00:00,0.984375,0.0,101050.0,0.0,...,0.463529,0.515294,0.025882,0.0,0.0,0.666667,0.222222,0.111111,0.0,0.0
1,1,49.40625,-5.484375,0 days 06:00:00,2020-02-07 12:00:00,2020-02-07 18:00:00,1.0,0.0,101036.0,0.536442,...,0.463529,0.515294,0.025882,0.0,0.0,0.666667,0.222222,0.111111,0.0,0.0
2,2,49.40625,-5.484375,0 days 06:00:00,2020-02-07 12:00:00,2020-02-07 18:00:00,0.984375,0.0,101104.0,0.0,...,0.463529,0.515294,0.025882,0.0,0.0,0.666667,0.222222,0.111111,0.0,0.0
3,3,49.40625,-5.484375,0 days 06:00:00,2020-02-07 12:00:00,2020-02-07 18:00:00,0.84375,0.0,101005.0,0.0,...,0.463529,0.515294,0.025882,0.0,0.0,0.666667,0.222222,0.111111,0.0,0.0
4,4,49.40625,-5.484375,0 days 06:00:00,2020-02-07 12:00:00,2020-02-07 18:00:00,1.0,0.0,101038.0,0.0,...,0.463529,0.515294,0.025882,0.0,0.0,0.666667,0.222222,0.111111,0.0,0.0


### Calculate u and v wind fields and add column to dataset <i>- to be moved into data prep </i>

Adding columns with u and v wind fields

In [25]:
# import math

# wdir_columns = prd_pipeline.get_profile_columns(['wind_from_direction'], merged_df.columns)
# ws_columns = prd_pipeline.get_profile_columns(['wind_speed'], merged_df.columns)

# for (wdir, ws) in zip(wdir_columns, ws_columns):
#     height_level = wdir.split('_')[-1]
#     print(height_level)
#     merged_df[f'u_wind_{height_level}'] = merged_df[f'wind_from_direction_{height_level}'].apply(
#         lambda x: math.sin(math.radians(270 - x))) * merged_df[f'wind_speed_{height_level}']
#     merged_df[f'v_wind_{height_level}'] = merged_df[f'wind_from_direction_{height_level}'].apply(
#         lambda x: math.cos(math.radians(270 - x))) * merged_df[f'wind_speed_{height_level}']

To check the u and v wind conversion, we convert back from u and v wind fields to wind speed and direction and compare this to wind speed and direction in the dataset. This test currently fails as there are a few data points with wind speeds of zero. May be worth some investigation into whether these zero are spurious or not, but perhaps easiest at this stage to filter out zero values. 

In [26]:
# # test for u and v wind conversion
# for (wdir, ws) in zip(wdir_columns, ws_columns):
#     height_level = wdir.split('_')[-1]
#     print(height_level)
#     print(merged_df[(merged_df[f'wind_from_direction_{height_level}'] != np.round(np.rad2deg(np.arctan2(merged_df[f'v_wind_{height_level}'], merged_df[f'u_wind_{height_level}'])) + 180, 4))][[f'wind_from_direction_{height_level}', f'wind_speed_{height_level}']])
#     merged_df = merged_df[merged_df[ws] != 0]
    
#     assert (merged_df[f'wind_speed_{height_level}'] == np.round(np.sqrt(merged_df[f'u_wind_{height_level}']**2 + merged_df[f'v_wind_{height_level}']**2),4)).all()
#     assert (merged_df[f'wind_from_direction_{height_level}'] == np.round(np.rad2deg(np.arctan2(merged_df[f'v_wind_{height_level}'], merged_df[f'u_wind_{height_level}'])) + 180, 4)).all()

### Data pre-processing

Split train, test and validate datasets

In [None]:
test_fraction=0.2
df_train, df_test = prd_pipeline.random_time_space_sample(merged_df, test_fraction=test_fraction, random_state=np.random.RandomState(), sampling_columns = ['time', 'latitude', 'longitude'])
data_splits, data_dims_dict = prd_pipeline.preprocess_data(df_train, feature_dict, test_fraction=test_fraction/(1-test_fraction))

target has dims: 5
dropping smallest bin: radar_fraction_in_band_instant_0.0
getting profile columns
{'nprof_features': 5, 'nheights': 33, 'nsinglvl_features': 0, 'nbands': 5}


### Plot pie charts of average fractions in bands

Make a copy of the training dataset and add a column which contains the label for the intensity band with the highest fraction

In [None]:
y_train_copy = data_splits['y_train'].copy().reset_index(drop=True)
y_train_copy['max_bin'] = y_train_copy.idxmax(axis=1)
y_train_copy[y_train_copy['max_bin']==feature_dict['target'][-1]]

Plot a pie chart of counts of the different intensity bands having highest fraction

In [None]:
avg_df = pd.DataFrame({'Counts': y_train_copy['max_bin'].value_counts()})
avg_df.plot.pie(subplots=True)
plt.legend(loc='upper center', bbox_to_anchor=(0.5,-0.1))
plt.title('Intensity band with highest fraction')
plt.show()

In [None]:
def pie_chart_mean_fractions_in_bands(df, title):
    """Calculates the average values of a pandas series provided and produces a pie chart"""
    avg_df = pd.DataFrame({'Average': df.mean()})
    avg_df.plot.pie(subplots=True)
    plt.legend(loc='upper center', bbox_to_anchor=(0.5,-0.1))
    plt.title(title)
    plt.show()

Produce pie chart for the average fraction in each intensity band for the train, test and validate datasets

In [None]:
pie_chart_mean_fractions_in_bands(data_splits['y_train'], title='Average fraction in each intensity band - train dataset')

In [None]:
pie_chart_mean_fractions_in_bands(data_splits['y_val'], title='Average fraction in each intensity band - validate dataset')

In [None]:
# pie_chart_mean_fractions_in_bands(df_test[target_parameter], title='Average fraction in each intensity band - test dataset')

In [None]:
data_splits['y_train'].boxplot(rot=90)

In [None]:
data_splits['nwp_val'].boxplot(rot=90)

In [None]:
data_splits['y_val'].boxplot(rot=90)

In [None]:
df_test[target_parameter].boxplot(rot=90)

For each intensity band, select a subset which contains datapoint where the highest fraction falls into that intensity band, then produce the pie chart showing this average fraction in each intensity band. This allows us to see whether on average when a band has highest fraction whether it is marginally highest or is much higher than fractions of other bands.

In [None]:
# y_train_copy = y_train_copy.reset_index(drop=True)
# for col in y_train_copy.columns[:-1]:
#     subset = y_train_copy[y_train_copy['max_bin']==col]
#     pie_chart_mean_fractions_in_bands(subset, title=f'Average fraction in intensity band \n highest frequency band {col} (nsamples = {len(subset)})')
#     plt.show()

### Class imbalance

Calculate weights to input when fitting model, which is used for weighting the loss function in the model training.

In [None]:
y_train_copy['max_bin'] = y_train_copy.idxmax(axis=1)
y_train_copy['max_bin'].value_counts()

In [None]:
# # weights = 1 / (tmp['max_bin'].value_counts() / tmp.shape[0]) / 4
# weights = 1 / data_splits['y_train'].mean() / 100
# weights = weights.reset_index(drop=True).to_dict()
# weights

Resample data to create more uniform distribution within the training data

In [None]:
# nsamples = len(y_train)//len(y_train.columns)
# print('n samples =', nsamples)

# y_train = pd.DataFrame()

# y_train = y_train.reset_index(drop=True)

# for col in y_train.columns[0:-1]:
#     print(col)
#     ids = y_train[y_train['max_bin']==col].index
#     print(ids.shape)
#     if len(ids)>0:
#         choices = np.random.choice(ids, nsamples)
#         y_train = pd.concat([y_train, y_train.loc[choices]])
#         try: 
#             X_train = np.concatenate([X_train, data_splits['X_train'][choices]])
#         except NameError:
#             X_train = data_splits['X_train'][choices]

# cols = data_splits['y_train'].columns
# plt.plot(data_splits['y_train'].sum(axis=0), label='original data')
# plt.plot(y_train[cols].sum(axis=0), label='resampled data')
# plt.legend()
# plt.show()

# y_train = y_train.drop(columns=['max_bin'])

# data_splits['X_train'] = X_train
# data_splits['y_train'] = y_train
# data_dims_dict['nbands'] = data_splits['y_train'].shapey_train

## Run experiment

Here we actually run the training and evaluation. All parameters and the output model will be saved through ML Flow, and we can track training stats through tensorboard.

In [None]:
exp_name='prd_fraction_models_mlflow'
exp1 = mlflow.create_experiment(exp_name)
exp1

In [None]:
exp1 = mlflow.get_experiment(exp1)
exp1

In [None]:
hyperparameter_dict = {
    'epochs': 10, 
    'learning_rate': 0.001, 
    'batch_size': 64, 
    # 'class_weights': weights, 
    'loss': tf.keras.losses.KLDivergence()
}

In [None]:
with mlflow.start_run(experiment_id=exp1.experiment_id) as current_run:
    mlflow.log_param('features', feature_dict['profile'] + feature_dict['single_level'])    
    model = prd_pipeline.build_model(**data_dims_dict)
    model.summary()
    model, history = prd_pipeline.train_model(model, data_splits, hyperparameter_dict, log_dir='./logs')

In [None]:
plt.plot(range(10),history.history['val_loss'])
plt.plot(range(10), history.history['loss'])

In [None]:
plt.plot(range(10), history.history['val_accuracy'])
plt.plot(range(10), history.history['accuracy'])

### Feature importance

Here we calculate the Breiman permutation importance for each of the model input features. This allows us to assess how much information each input feature is contributing to the resulting predictions from the ML model. 

 - At each step, only one predictor is permuted (randomized)
 - The amount by which the loss function increases when 𝑥 is randomized, is considered the importance of 𝑥
 - If the loss function increases slightly when 𝑥 is permuted, 𝑥 is somewhat important
 - If the loss function explodes when 𝑥 is permuted, 𝑥 is very important
 - If the loss function remains the same or decreases when 𝑥 is permuted, 𝑥 is not important

In [None]:
feature_names = feature_dict['profile'] + feature_dict['single_level']

In [None]:
baseline_metric = history.history['val_loss'][-1]
npermutations=10

In [None]:
permutation_importance = prd_pipeline.calculate_permutation_feature_importance(model, data_splits, feature_dict, baseline_metric, npermutations)

In [None]:
avg_importance = {}
for k,v in permutation_importance.items():
    avg_importance[k] = np.mean(v)
print(avg_importance)

In [None]:
df = pd.DataFrame(permutation_importance)
df = df.reindex(columns=[k for k, v in sorted(avg_importance.items(), key=lambda item: item[1])])
df.boxplot(showmeans=True, vert=False)
plt.ylabel('Feature')
plt.xlabel('Change in loss function by permuting feature')
plt.show()

### Model evaluation

In [None]:
y_pred = model.predict(data_splits['X_val'])

pred_column_names = [intensity_band_template.format(source='ml', band_centre=threshold) for threshold in bands.keys()]
y_pred_df = pd.DataFrame(y_pred, columns=pred_column_names)

In [None]:
evaluation_df = pd.concat([
    data_splits['meta_val'].reset_index(drop=True), 
    data_splits['nwp_val'].reset_index(drop=True), 
    data_splits['y_val'].reset_index(drop=True), 
    y_pred_df], axis=1)

In [None]:
df = evaluation_df[evaluation_df.realization==0]
for i in np.arange(1):
    df.iloc[i][feature_dict['target']].plot(label='Radar')
    df.iloc[i][feature_dict['nwp']].plot(label='NWP')
    df.iloc[i][pred_column_names].plot(label='ML (control member)')
    plt.ylabel('Fraction')
    plt.xticks(np.arange(5), intensity_bands,rotation=45, ha='right')
    plt.legend()
    plt.show()

In [None]:
grouped_df = evaluation_df.groupby(['time', 'latitude', 'longitude'])
grouped_df_mean = grouped_df.agg('mean')

In [None]:
for i in np.arange(1):
    grouped_df_mean.iloc[i][feature_dict['target']].plot(label='Radar')
    grouped_df_mean.iloc[i][feature_dict['nwp']].plot(label='NWP')
    grouped_df_mean.iloc[i][pred_column_names].plot(label='ML (mean)')
    plt.ylabel('Fraction')
    plt.xticks(np.arange(5), intensity_bands,rotation=45, ha='right')
    plt.legend()
    plt.show()

In [None]:
intensity_bands = ['0mm-0.01mm', '0.01mm-0.5mm', '0.5mm-4mm', '4mm-10mm', '>10mm']

group_keys = list(grouped_df.groups.keys())
for i in np.arange(1):
    evaluation_df.iloc[grouped_df.groups[group_keys[i]]][pred_column_names].T.plot(lw=0.5, color='grey', label='_nolegend_')
    
    grouped_df_mean.iloc[i][feature_dict['target']].plot(label='Radar')
    grouped_df_mean.iloc[i][feature_dict['nwp']].plot(label='NWP')
    grouped_df_mean.iloc[i][pred_column_names].plot(label='ML (mean)', ls='--')
    
    plt.ylabel('Fraction')
    plt.xticks(np.arange(5), intensity_bands, rotation=45, ha='right')
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plt.title(f'{group_keys[i][0]}\n{group_keys[i][1:]}')
    
    plt.show()

In [None]:
intensity_bands = ['0mm-0.01mm', '0.01mm-0.5mm', '0.5mm-4mm', '4mm-10mm', '>10mm']

group_keys = list(grouped_df.groups.keys())
for i in np.arange(1):
    x = np.arange(data_dims_dict['nbands'])  # the label locations
    width = 0.25  # the width of the bars

    fig, ax = plt.subplots(figsize=(8,5))
    rects1 = ax.bar(x - width, grouped_df_mean.iloc[i][pred_column_names], width, label='ML predicted fractions (mean)')
    rects2 = ax.bar(x + width, grouped_df_mean.iloc[i][feature_dict['target']], width, label='Radar fractions')
    rects3 = ax.bar(x, grouped_df_mean.iloc[i][feature_dict['nwp']], width, label='NWP probabilities')

    # Add some text for labels, title and custom x-axis tick labels, etc.
    ax.set_ylabel('Fraction in precip intensity band')
    ax.set_xlabel('Precip intensity band')
    ax.set_title('Instantenous precipitiation fractions in intensity bands')

    plt.xticks(np.arange(5), intensity_bands)
    ax.legend()

    fig.tight_layout()

    plt.show()


Fractional skill score

In [None]:
def calculate_fss(obs, fx):
    """ 
    The inputs to this function are the cumulative probability/fraction of exceeding a given precipitation threshold
    """
    FBS = ((fx - obs)**2).sum()
    FBS_ref = (fx**2).sum() + (obs**2).sum()
    FSS = 1 - (FBS/FBS_ref)
    return FSS

In [None]:
# Calculate FSS - each ensemble member produced ML prediction and treated as deterministic 
for i, col in enumerate(target_parameter):
    y_test = 1 - data_splits['y_val'][target_parameter[:i+1]].sum(axis=1)
    y_test[y_test<0] = 0
    nwp_test = 1 - data_splits['nwp_val'][nwp_comparison[i+5]].sum(axis=1)
    nwp_test[nwp_test<0]=0
    y_cumulative_pred = 1 - y_pred[:,:i+1].sum(axis=1)
    y_cumulative_pred[y_cumulative_pred<0]=0
    print(f'ML FSS (intensity band {intensity_bands[i]}) {calculate_fss(y_test, y_cumulative_pred):.4f}')
    print(f'NWP FSS (intensity band {intensity_bands[i]}) {calculate_fss(y_test, nwp_test):.4f}')

In [None]:
# Calculate FSS for mean of ML prediction from set of ens members
grouped = evaluation_df.groupby(['time', 'latitude', 'longitude']).agg('mean')
for i, col in enumerate(target_parameter):
    y_test = 1 - grouped[target_parameter[:i+1]].sum(axis=1)
    y_test[y_test<0] = 0
    nwp_test = 1 - grouped[nwp_comparison[i+5]].sum(axis=1)
    nwp_test[nwp_test<0]=0
    y_cumulative_pred = 1 - grouped[pred_column_names[:i+1]].sum(axis=1)
    y_cumulative_pred[y_cumulative_pred<0]=0
    print(f'ML FSS (intensity band {intensity_bands[i]}) {calculate_fss(y_test, y_cumulative_pred):.4f}')
    print(f'NWP FSS (intensity band {intensity_bands[i]}) {calculate_fss(y_test, nwp_test):.4f}')

# Plot of FSS on map

In [None]:
def fss_for_grid(df, model, feature_dict, data_dims_dict):
    X_test_df, y_test_df, nwp_test_df = prd_pipeline.load_test_data(df, feature_dict, data_dims_dict)

    ypred_test_df = model.predict(X_test_df)
    
    # calculate cumulative probabilities  - WHY DOES THIS NOT WORK??
    y_pred_cdf = 1 - ypred_test_df.cumsum(axis=1)
    y_pred_cdf[y_pred_cdf<0]=0  # Some cumulative fractions sum to just over 1 due to rounding error

    y_test_cdf = 1 - y_test_df.cumsum(axis=1)
    y_test_cdf[y_test_cdf<0]=0  # Some cumulative fractions sum to just over 1 due to rounding error

    nwp_test_cdf = 1 - nwp_test_df.cumsum(axis=1)
    nwp_test_cdf[nwp_test_cdf<0]=0  # Some cumulative fractions sum to just over 1 due to rounding error
    
    # calculative fractional skill score 
    ml_fss, nwp_fss = [], [] 
    for i, col in enumerate(feature_dict['target']):
        # ml_fss.append(calculate_fss(y_test_cdf.iloc[:,i], y_pred_cdf[:,i]))
        # nwp_fss.append(calculate_fss(y_test_cdf.iloc[:,i], nwp_test_cdf.iloc[:,i]))
        
        ml = y_pred_cdf[:,i]
        nwp = nwp_test_cdf.iloc[:,i]
        radar = y_test_cdf.iloc[:,i]

#         radar = radar[ml > 0]
#         nwp = nwp[ml > 0]
#         ml = ml[ml > 0]

#         radar = radar[nwp > 0]
#         ml = ml[nwp > 0]
#         nwp = nwp[nwp > 0]

        ml_fss.append(calculate_fss(radar, ml))
        nwp_fss.append(calculate_fss(radar, nwp))
    
    ml_fss_names = ['_'.join(['ml_fss'] + [name.split('_')[-1]]) for name in feature_dict['target']]
    nwp_fss_names = ['_'.join(['nwp_fss'] + [name.split('_')[-1]]) for name in feature_dict['target']]
    
    return pd.concat([pd.Series(ml_fss, index=ml_fss_names), pd.Series(nwp_fss, index=nwp_fss_names)])

In [None]:
# To calculate FSS of fraction predicted by ML model treating each ensemble members as deterministic
# fss_grid = merged_df.groupby(['latitude', 'longitude']).apply(lambda x: fss_for_grid(x, model, feature_dict, data_dims_dict))

In [None]:
# To calculate FSS of mean fraction predicted by ML model for ensemble members from same model run
# grid_grouped_df = merged_df.groupby(['time', 'latitude', 'longitude']).agg('mean').reset_index()
# fss_grid = grid_grouped_df.groupby(['latitude', 'longitude']).apply(lambda x: fss_for_grid(x, model, feature_dict, data_dims_dict))

In [None]:
# To calculate FSS of mean fraction predicted by ML model for ensemble members from same model run
fss_grid = merged_df[merged_df.realization==0].groupby(['latitude', 'longitude']).apply(lambda x: fss_for_grid(x, model, feature_dict, data_dims_dict))

In [None]:
fss_grid

In [None]:
fss_grid.to_csv('fss_ml_control_pred.csv')