# CS1 Dataset Report


Import libraries, and print package versions.

In [None]:
import paramaterial as pam
from paramaterial import DataSet, DataItem, ModelSet
from paramaterial.models import ramberg
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt

print(pam.__version__)
print(pd.__version__)
print(sns.__version__)
print(mpl.__version__)

### Gather data and info

Extract info from the filenames and make the info table.

In [None]:
info_lists = [[filename] + filename.split('_')[:4] for filename in os.listdir('data/01 raw data')]
info_table = pd.DataFrame(info_lists,
                          columns=['old_filename', 'test_type', 'temperature', 'lot', 'number']
                          ).sort_values(by='test_type', ascending=False)

Add a unique test ID column.

In [None]:
info_table['test_id'] = [f'test_ID_{i + 1:03d}' for i in range(len(info_table))]
info_table = info_table.set_index('test_id').reset_index()  # move the test_id column to the far left

Drop the PST tests. We are only going to be processing and analysing the uniaxial tests.

In [None]:
info_table['test_type'] = info_table['test_type'].replace('T', 'UT')
info_table['test_type'] = info_table['test_type'].replace('P', 'PST')
info_table = info_table[info_table['test_type'] != 'PST']
info_table = info_table[~info_table['lot'].isin(list('FGHI'))]

Add the information from the paper, and convert the temperatures to numbers.

In [None]:
info_table['rate'] = 8.66e-4  # units (/s) and all tests performed at same rate
info_table['A_0_(mm)'] = np.where(info_table['test_type'] == 'UT', 40.32, 20.16)
info_table['h_0_(mm)'] = 3.175
info_table['temperature'] = pd.to_numeric(info_table['temperature'])

### Format the data files.
In this example, the files are already in .csv format. We just check that the column headers are the same and that there are no duplicates, then rename the files by test id.

Check column headers, uniqueness.

In [None]:
pam.check_column_headers('data/01 raw data')
pam.check_for_duplicate_files('data/01 raw data')

Write the prepared data and rename the files by test id. Also write the prepared info table.

In [None]:
pam.copy_data_and_rename_by_test_id(data_in='data/01 raw data', data_out='data/01 prepared data',
                                    info_table=info_table)
info_table.to_excel('info/01 prepared info.xlsx', index=False)

### Make the experimental matrix
We want to identify useful groupings and make visualisations. The tests can be grouped by lot and temperature, with up to 3 repeated tests.

In [None]:
gold_cmap = mpl.colors.LinearSegmentedColormap.from_list("", ["white", (85 / 255, 49 / 255, 0)])
mpl.rcParams["axes.facecolor"] = gold_cmap(0.1)

In [None]:
plt.figure(figsize=(2, 2))
pam.experimental_matrix(info_table, index='temperature', columns='lot', as_heatmap=True, cmap=gold_cmap, xlabel='Lot',
                        ylabel='Temperature (°C)', vmax=6);

### Visualise the prepared data.

We could colour by lot or by temperature. Colouring by lot is useful to observe variation, colouring by temperature is useful to observe temperature variation.

In [None]:
def make_strain_percent(di):
    di.data['Strain'] = di.data['Strain'] * 100
    return di


prepared_ds = DataSet('info/01 prepared info.xlsx', 'data/01 prepared data').sort_by(['temperature', 'lot']).apply(
    make_strain_percent)

In [None]:
lot_styler = pam.Styler(color_by='lot', color_by_label='Lot', cmap='inferno').style_to(prepared_ds)


def ds_subplots(ds: DataSet, **kwargs):
    temperatures = sorted(prepared_ds.info_table['temperature'].unique())
    return pam.dataset_subplots(
        ds=ds, x='Strain', y='Stress_MPa', xlabel='Strain (%)', ylabel='Stress (MPa)',
        styler=lot_styler, plot_legend=False, figsize=(12, 2), shape=(1, 6), ylim=(-25., 350.),
        wspace=0.1, rows_by='test_type', cols_by='temperature', row_vals=[['UT']],
        col_vals=[[T] for T in temperatures], col_titles=[f'{T}°C' for T in temperatures],
        **kwargs)


ds_subplots(prepared_ds);

### Find UTS and Failure

In [None]:
def find_uts_and_failure(di: DataItem):
    di.info['UTS_1'] = di.data['Stress_MPa'].max()
    di.info['UTS_0'] = di.data['Strain'][di.data['Stress_MPa'].idxmax()]
    di.info['FP_0'] = di.data['Strain'].max()
    return di


prepared_ds = prepared_ds.apply(find_uts_and_failure)

### Trimming

In [None]:
def trim_to_small_strain(di: DataItem):
    di.data = di.data[di.data['Strain'] < 1]
    return di


trimmed_ds = prepared_ds.apply(trim_to_small_strain)

### Foot Correction

In [None]:
corrected_ds = pam.find_upl_and_lpl(trimmed_ds, preload=36, preload_key='Stress_MPa')
corrected_ds = pam.correct_foot(corrected_ds)

### Foot correction screening

Make screening pdf.

In [None]:
temp_styler = pam.Styler(color_by='temperature', color_by_label='(°C)').style_to(prepared_ds)


def ds_plot(ds: DataSet, **kwargs):
    return pam.dataset_plot(ds, x='Strain', y='Stress_MPa', xlabel='Strain (%)', ylabel='Stress (MPa)',
                            styler=temp_styler, **kwargs)


def foot_correction_screening_plot(di):
    test_id = di.test_id
    temp = di.info['temperature']
    color = temp_styler.color_dict[temp]
    UPL = (di.info['UPL_0'], di.info['UPL_1'])
    LPL = (di.info['LPL_0'], di.info['LPL_1'])
    _ax = ds_plot(corrected_ds.subset({'test_id': [test_id]}))
    _ax = ds_plot(trimmed_ds.subset({'test_id': [test_id]}), alpha=0.5, ax=_ax)
    _ax.axline(UPL, slope=di.info['E'], c=color, ls='--', alpha=0.5, zorder=500 + temp)
    _ax.plot(*UPL, c='k', mfc=color, marker=4, alpha=0.8, markersize=6, zorder=1000 + temp)
    _ax.plot(*LPL, c='k', mfc=color, marker=5, alpha=0.8, markersize=6, zorder=1000 + temp)


foot_correction_screening_plot(corrected_ds[3])
pam.make_screening_pdf(corrected_ds, foot_correction_screening_plot,
                       'info/foot correction screening.pdf');

Reject flagged tests.

In [None]:
rejected_ds = pam.read_screening_pdf(corrected_ds, 'info/foot correction screening marked.pdf')
rejected_items = rejected_ds.info_table[rejected_ds.info_table['reject'] == 'True']
rejected_items[['test_id', 'temperature', 'lot', 'number', 'reject', 'comment']]

In [None]:
screened_ds = pam.remove_rejected_items(rejected_ds)

### Proof Stress

In [None]:
def reset_strain(di):
    di.data['Strain'] = di.data['Strain'] / 100
    di.info['E'] = di.info['E'] * 100
    return di


proof_ds = pam.find_proof_stress(screened_ds.apply(reset_strain))

In [None]:
proof_ds.write_output('info/02 processed info.xlsx', 'data/02 processed data')
processed_ds = DataSet('info/02 processed info.xlsx', 'data/02 processed data')

### Representative curves

In [None]:
pam.make_representative_data(processed_ds, 'info/03 repres info.xlsx', 'data/03 repres data',
                             repres_col='Stress_MPa', group_by_keys=['lot', 'temperature', 'test_type'],
                             interp_by='Strain', group_info_cols=['E', 'PS_0.002_1', 'UTS_1', 'UTS_0', 'FP_0', 'UPL_1'])
repres_ds = DataSet('info/03 repres info.xlsx', 'data/03 repres data', test_id_key='repres_id')
ds_subplots(repres_ds, fill_between=('min_Stress_MPa', 'max_Stress_MPa'));

### Mechanical properties

In [None]:
table = pam.make_representative_info(processed_ds, group_by_keys=['temperature', 'test_type'],
                                     group_info_cols=['E', 'PS_0.002_1', 'UTS_1', 'UTS_0', 'FP_0', 'UPL_1'])
# wrape every entry in "\color{red}{}"
table[['temperature', 'nr averaged', 'E', 'PS_0.002_1', 'UTS_1', 'UTS_0', 'FP_0', 'std_PS_0.002_1', 'std_UTS_1',
       'std_UTS_0', 'std_FP_0']].style.hide(axis='index').format('{:.3g}').to_latex('info/04 mechanical properties.tex')

### Fitted curves

In [None]:
ramberg_ms = ModelSet(ramberg, param_names=['C', 'n'], var_names=['E', 'UPL_1'],
                      bounds=[(0., 1000.), (0.01, 0.8)], scipy_func='minimize')

ramberg_ms.fit_to(repres_ds, 'Strain', 'Stress_MPa', sample_size=40)

In [None]:
ramberg_ds = ramberg_ms.predict()
ds_subplots(ramberg_ds);

### Fitting results

In [None]:
ramberg_ds.info_table

In [None]:
table = pam.make_representative_info(ramberg_ds, group_by_keys=['temperature', 'test_type'],
                                     group_info_cols=['E', 'UPL_1', 'C', 'n', 'error'])
table[['temperature', 'nr averaged', 'E', 'UPL_1', 'C', 'n', 'error']].style.hide(axis='index').format(
    '{:.3g}').to_latex('info/05 fitting results.tex')