# Load Data


We download the AFRC radial forge data from the link specified. 

Credits to Christos Tachtatzis for the code to download & extract.

In [2]:
afrc_data_url='https://zenodo.org/record/2573861/files/STRATH%20radial%20forge%20dataset.zip?download=1'
data_path = 'Data' #folder for dataset

In [3]:
import os, requests, zipfile, io

def download_and_extract(url, destination, force=False):
    response = requests.get(url)
    zipDocument = zipfile.ZipFile(io.BytesIO(response.content))
    # Attempt to see if we are going to overwrite anything
    if not force:
        abort = False
        for file in zipDocument.filelist:
            if os.path.isfile(os.path.join(destination, file.filename)):
                print(file.filename, 'already exists. If you want to overwrite the file call the method with force=True')
                abort = True
        if abort:
            print('Zip file was not extracted')
            return
        
    zipDocument.extractall(destination)
    

In [4]:
download_and_extract(afrc_data_url, data_path)

The data is downloaded into the folder 'Data' , now we transform the data into a list of dataframes.

Each dataframe in list represents the time-series measurements of all sensors for a part. 

In [2]:
import pandas as pd
import pickle as pickle
import os

## Load sensor data into dataframes

In [6]:
data_inputs_list = []

#load each part's data as a dataframe to a list
for filename in os.listdir(data_path):
    if "Scope" in filename and "csv" in filename:
        file_csv = pd.read_csv(data_path+'/'+filename, encoding='cp1252')
        data_inputs_list.append(file_csv)

        

In [8]:
len(data_inputs_list)

81

## Load CMM data into dataframe

1. Read data
2. Subtract the CMM measurements from the "base value"
3. Save into a dataframe

In [4]:
data_path = 'Data' #folder for dataset
output_pd = pd.read_excel(data_path+"/"+"CMMData81Parts.xlsx")
 
#extract necessary output values
output_headers = output_pd.columns
base_val = output_pd.values[0,:]

output_val = output_pd.values[3:,:]

np_data_outputs = output_val

#extract error from expected base values
for output in range(np_data_outputs.shape[1]):
    np_data_outputs[:,output] -=base_val[output]

In [5]:
output_df = {}
for i, value in enumerate(output_headers):
    new_df = {value:np_data_outputs[:,i]}
    output_df.update(new_df)
output_df = pd.DataFrame(output_df)

In [6]:
output_df

Unnamed: 0,ID @100,ID @55,38 dia @200,42 dia @140,42 dia @80,Base angle F,Base angle BR,Base angle BL,162mm taper F,162mm taper BR,162mm taper BL,40.5mm taper F,40.5mm taper BR,40.5mm taper BL,Top1,Top2,Top3,Top4
0,-0.150,-0.450,0.040,-0.030,0.156,0.262,-0.075,-0.138,-0.416,-0.321,-0.382,0.129,0.179,0.193,10.279,12.225,11.071,11.112
1,-0.147,-0.436,0.036,-0.037,0.154,0.181,0.056,0.390,-0.328,-0.435,-0.292,0.185,0.123,-0.034,11.213,11.851,10.667,12.183
2,-0.146,-0.442,0.024,-0.045,0.141,0.027,-0.096,-0.219,-0.371,-0.461,-0.309,0.054,0.024,0.174,10.505,12.582,11.839,12.638
3,-0.145,-0.437,0.017,-0.045,0.136,-0.231,-0.295,-0.212,-0.253,-0.389,-0.433,0.168,0.132,-0.027,12.954,13.349,11.988,14.026
4,-0.140,-0.429,0.007,-0.057,0.135,-0.043,-0.259,-0.010,-0.438,-0.457,-0.462,0.130,0.052,-0.018,12.646,12.190,11.299,13.538
5,-0.143,-0.435,0.006,-0.057,0.136,-0.096,-0.176,-0.056,-0.255,-0.299,-0.464,0.113,0.106,-0.015,12.028,12.022,11.361,12.431
6,-0.144,-0.429,0.000,-0.072,0.123,-0.009,-0.122,-0.051,-0.315,-0.467,-0.363,-0.098,0.051,0.126,11.824,13.696,12.866,13.254
7,-0.144,-0.433,-0.006,-0.068,0.118,-0.240,-0.046,-0.004,-0.545,-0.582,-0.416,-0.150,0.098,0.050,12.022,13.658,12.645,12.871
8,-0.147,-0.431,-0.003,-0.075,0.121,-0.093,-0.474,-0.107,-0.353,-0.434,-0.520,0.128,0.001,-0.129,13.836,14.222,12.758,15.078
9,-0.144,-0.411,-0.009,-0.081,0.118,-0.293,-0.261,-0.397,-0.608,-0.486,-0.402,0.165,0.054,-0.132,12.880,12.541,11.776,13.576


## Pickle Data

Pickle the input & output data for ease of future use

In [13]:
pickle_path = "pickles"
input_file_name = "strath_inputs.p"
output_file_name = "strath_outputs.p"


if pickle_path not in os.listdir():
    os.mkdir(pickle_path)

#save into pickle file
pickle.dump(data_inputs_list, open( pickle_path+"/"+input_file_name, "wb" ) )
pickle.dump(output_df, open( pickle_path+"/"+output_file_name, "wb" ) ) 