<a href="https://colab.research.google.com/github/frasermcghan/Year3Project/blob/master/GADF_Vibration.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import matplotlib.pyplot as plt 
%matplotlib inline

import pandas as pd 
import numpy as np
import os
from pathlib import Path

from pyts.image import GramianAngularField, MarkovTransitionField, RecurrencePlot
from sklearn.model_selection import train_test_split

In [0]:
project_directory = "/Users/fraser/Uni/Year 3/Project/"

In [0]:
data_summary_file = "/Users/fraser/Uni/Year 3/Project/processed_data/data_summary.csv"

# Split data into train and test set

- classes are imabalanced as there are only datapoints for 2/3 load conditions for '4bars' and '8bars'
- therefore we will split the dataset by fault, randomly split each into train and test sets, then join them back together

In [0]:
data = pd.read_csv(data_summary_file)

In [0]:
data_1bar = data[data['label'] == '1bar']
data_4bars = data[data['label'] == '4bars']
data_8bars = data[data['label'] == '8bars']
data_inner = data[data['label'] == 'inner']
data_outer = data[data['label'] == 'outer']
data_ball = data[data['label'] == 'ball']
data_healthy = data[data['label'] == 'healthy']

In [0]:
print(f"Number of 1bar: {len(data_1bar)}")
print(f"Number of 4bars: {len(data_4bars)}")
print(f"Number of 8bars: {len(data_8bars)}")
print(f"Number of inner: {len(data_inner)}")
print(f"Number of outer: {len(data_outer)}")
print(f"Number of ball: {len(data_ball)}")
print(f"Number of healthy: {len(data_healthy)}")

Number of 1bar: 360
Number of 4bars: 240
Number of 8bars: 240
Number of inner: 360
Number of outer: 360
Number of ball: 360
Number of healthy: 360


### Create Test Set

In [0]:
data_1bar_train, data_1bar_test = train_test_split(data_1bar, 
                                                   test_size=0.2,
                                                   random_state=42)

data_4bars_train, data_4bars_test = train_test_split(data_4bars, 
                                                     test_size=0.2,
                                                     random_state=42)

data_8bars_train, data_8bars_test = train_test_split(data_8bars,
                                                     test_size=0.2,
                                                     random_state=42)

data_inner_train, data_inner_test = train_test_split(data_inner,
                                                     test_size=0.2, 
                                                     random_state=42)

data_outer_train, data_outer_test = train_test_split(data_outer, 
                                                     test_size=0.2, 
                                                     random_state=42)

data_ball_train, data_ball_test = train_test_split(data_ball, 
                                                   test_size=0.2, 
                                                   random_state=42)

data_healthy_train, data_healthy_test = train_test_split(data_healthy, 
                                                         test_size=0.2, 
                                                         random_state=42)

In [0]:
test_dfs = [data_1bar_test, 
             data_4bars_test,
             data_8bars_test,
             data_inner_test,
             data_outer_test,
             data_ball_test,
             data_healthy_test]

In [0]:
data_test = pd.concat(test_dfs)

In [0]:
data_test.head()

Unnamed: 0,sample,vibration,current,thermal,label
224,225,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,1bar
42,43,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,1bar
285,286,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,1bar
302,303,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,1bar
56,57,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,1bar


### Create Training and Validation Set

In [0]:
data_1bar_train, data_1bar_valid = train_test_split(data_1bar_train, 
                                                    test_size=0.2, 
                                                    random_state=42)

data_4bars_train, data_4bars_valid = train_test_split(data_4bars_train,
                                                      test_size=0.2,
                                                      random_state=42)

data_8bars_train, data_8bars_valid = train_test_split(data_8bars_train,
                                                      test_size=0.2, 
                                                      random_state=42)

data_inner_train, data_inner_valid = train_test_split(data_inner_train, 
                                                      test_size=0.2, 
                                                      random_state=42)

data_outer_train, data_outer_valid = train_test_split(data_outer_train, 
                                                      test_size=0.2, 
                                                      random_state=42)

data_ball_train, data_ball_valid = train_test_split(data_ball_train,
                                                    test_size=0.2, 
                                                    random_state=42)

data_healthy_train, data_healthy_valid = train_test_split(data_healthy_train, 
                                                          test_size=0.2, 
                                                          random_state=42)

In [0]:
train_dfs = [data_1bar_train, 
             data_4bars_train,
             data_8bars_train,
             data_inner_train,
             data_outer_train,
             data_ball_train,
             data_healthy_train]

In [0]:
valid_dfs = [data_1bar_valid, 
             data_4bars_valid,
             data_8bars_valid,
             data_inner_valid,
             data_outer_valid,
             data_ball_valid,
             data_healthy_valid]

In [0]:
data_train = pd.concat(train_dfs)

In [0]:
data_train.head()

Unnamed: 0,sample,vibration,current,thermal,label
296,297,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,1bar
81,82,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,1bar
10,11,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,1bar
97,98,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,1bar
145,146,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,1bar


In [0]:
data_valid = pd.concat(valid_dfs)

In [0]:
data_valid.head()

Unnamed: 0,sample,vibration,current,thermal,label
301,302,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,1bar
150,151,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,1bar
189,190,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,1bar
79,80,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,1bar
322,323,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,/Users/fraser/Uni/Year 3/Project/processed_dat...,1bar


## Check that train, test, valid sets are unique

In [0]:
set(data_train['sample'].values) & set(data_valid['sample'].values) & set(data_test['sample'].values)        

set()

# GADF Vibration

In [0]:
gadf = GramianAngularField(image_size=224, method='difference', overlapping=True)

## Create GADF Vibration Directory

In [0]:
gadf_vib_dir = project_directory + "GADF_vibration"

if not os.path.exists(gadf_vib_dir):
    os.mkdir(gadf_vib_dir)
else:
    print(f"{gadf_vib_dir} aready exists.")

/Users/fraser/Uni/Year 3/Project/GADF_vibration aready exists.


## Training Set

## Create images & save to directory

In [0]:
gadf_vib_dir_train = gadf_vib_dir + f"/train"

In [0]:
for i, v_file in enumerate(data_train['vibration']):
    
    if i % 100 == 0:
        print(f"{i}/{len(data_train['vibration'])} images produced...")
    
    v_path = Path(v_file)
    label = v_path.parts[7]
    
    dest_directory = gadf_vib_dir_train + f"/{label}/"
    
    if not os.path.exists(dest_directory):
        os.mkdir(dest_directory)
    
    filename = v_path.parts[-1].replace(".csv","")
    
    v_data = np.array(pd.read_csv(v_file))
    
    v_gadf = gadf.fit_transform(v_data.T)
    
    plt.imsave(f"{dest_directory}{filename}.png", v_gadf[0])

0/1456 images produced...
100/1456 images produced...
200/1456 images produced...
300/1456 images produced...
400/1456 images produced...
500/1456 images produced...
600/1456 images produced...
700/1456 images produced...
800/1456 images produced...
900/1456 images produced...
1000/1456 images produced...
1100/1456 images produced...
1200/1456 images produced...
1300/1456 images produced...
1400/1456 images produced...


## Validation Set

In [0]:
gadf_vib_dir_valid = gadf_vib_dir + f"/valid"

In [0]:
for i, v_file in enumerate(data_valid['vibration']):
    
    if i % 100 == 0:
        print(f"{i}/{len(data_valid['vibration'])} images produced...")
    
    v_path = Path(v_file)
    label = v_path.parts[7]
    
    dest_directory = gadf_vib_dir_valid + f"/{label}/"
    
    if not os.path.exists(dest_directory):
        os.mkdir(dest_directory)
    
    filename = v_path.parts[-1].replace(".csv","")
    
    v_data = np.array(pd.read_csv(v_file))
    
    v_gadf = gadf.fit_transform(v_data.T)
    
    plt.imsave(f"{dest_directory}{filename}.png", v_gadf[0])

0/368 images produced...
100/368 images produced...
200/368 images produced...
300/368 images produced...


## Test Set

In [0]:
gadf_vib_dir_test = gadf_vib_dir + f"/test"

In [0]:
for i, v_file in enumerate(data_test['vibration']):
    
    if i % 100 == 0:
        print(f"{i}/{len(data_test['vibration'])} images produced...")
    
    v_path = Path(v_file)
    label = v_path.parts[7]
    
    dest_directory = gadf_vib_dir_test + f"/{label}/"
    
    if not os.path.exists(dest_directory):
        os.mkdir(dest_directory)
    
    filename = v_path.parts[-1].replace(".csv","")
    
    v_data = np.array(pd.read_csv(v_file))
    
    v_gadf = gadf.fit_transform(v_data.T)
    
    plt.imsave(f"{dest_directory}{filename}.png", v_gadf[0])

0/456 images produced...
100/456 images produced...
200/456 images produced...
300/456 images produced...
400/456 images produced...
