In [17]:
import pandas as pd
import numpy as np
import os

In [8]:
current_folder_path = os.getcwd()
parent_folder_path = os.path.dirname(current_folder_path)
data_save_path = os.path.join(parent_folder_path, 'data')

In [71]:
df = pd.read_feather("df_f_feather")
df = df.drop(columns = {'time'})
df = df.drop(index=0)

In [9]:
data_save_path

'c:\\Users\\UTKU\\OneDrive\\Desktop\\MS-Term4\\ProjectArbeit\\data'

In [55]:
# Clean the spaces in the label names
df['label'] = df['label'].str.replace(' ', '')

# Split the dataset based on labels
labels = ['Bias', 'Drift', 'Gain', 'NoFault', 'Outliers', 'Precisiondegradation']

datasets = {}
dataframes =[]

for label in labels:
    datasets[label] = df[df['label'] == label]

    # Rename the dataset as df_labelname
    globals()[f'df_{label}'] = datasets[label]
    dataframes.append(f'df_{label}')
    # Optional: Save the dataset to a separate CSV file
    datasets[label].to_csv(data_save_path+ "/" +f'df_{label}.csv', index=False)

In [49]:
# Print the size of each new DataFrame
print("Size of df_Bias:", df_Bias.shape)
print("Size of df_Drift:", df_Drift.shape)
print("Size of df_Gain:", df_Gain.shape)
print("Size of df_NoFault:", df_NoFault.shape)
print("Size of df_Outliers:", df_Outliers.shape)
print("Size of df_PrecisionDegredatation:", df_Precisiondegradation.shape)

Size of df_Bias: (184700, 2)
Size of df_Drift: (184701, 2)
Size of df_Gain: (184701, 2)
Size of df_NoFault: (184177, 2)
Size of df_Outliers: (524, 2)
Size of df_PrecisionDegredatation: (184701, 2)


In [33]:
def split_column_into_rows(df, num_columns, overlapping_ratio):
    values = df['value'].to_numpy()  


    overlapping_elements = int(num_columns * overlapping_ratio / 100)

   
    row_elements = num_columns - overlapping_elements


    num_rows = (len(values) + row_elements - 1) // row_elements


    last_row_fill = (num_rows * num_columns) - len(values)

 
    values = np.pad(values, (0, last_row_fill), mode='constant', constant_values=0)

    new_values = np.zeros((num_rows, num_columns))
    for i in range(num_rows):
        start = i * row_elements
        end = start + num_columns
        new_values[i] = values[start:end]


    new_df = pd.DataFrame(new_values)

    label_name = df.at[0, 'label']
    new_df.insert(0,'label',label_name)

    return new_df

In [57]:
df_Precisiondegradation = df_Precisiondegradation.reset_index(drop=True, inplace=False)

In [59]:
df_new = split_column_into_rows(df_Precisiondegradation,10,50)

Unnamed: 0,value,label
0,-1.003319,Precisiondegradation
1,-1.005235,Precisiondegradation
2,-1.000776,Precisiondegradation
3,-1.008916,Precisiondegradation
4,-1.000741,Precisiondegradation
...,...,...
184696,-1.001516,Precisiondegradation
184697,-0.995759,Precisiondegradation
184698,-1.001589,Precisiondegradation
184699,-1.001919,Precisiondegradation


In [86]:
total_data = pd.DataFrame()
for key in datasets.keys():
    # Access the dataframe using the key
    df = datasets[key]
    df = df.reset_index(drop=True, inplace=False)
    df = split_column_into_rows(df,150,10)
    df = df.drop(df.index[-1])
    total_data = pd.concat([total_data,df])

total_data = total_data.reset_index(drop=True, inplace=False)

print(total_data.shape)



(6839, 151)


In [74]:
def convert_labels_to_integers(dataframe):
    unique_labels = dataframe['label'].unique()
    label_to_integer = {label: i+1 for i, label in enumerate(unique_labels)}
    dataframe['label'] = dataframe['label'].map(label_to_integer)
    
    for label, integer in label_to_integer.items():
        print(f"Label '{label}' changed to integer '{integer}'")
    
    return dataframe

In [87]:
total_data1 = convert_labels_to_integers(total_data)
total_data1.sample(30)

Label 'Bias' changed to integer '1'
Label 'Drift' changed to integer '2'
Label 'Gain' changed to integer '3'
Label 'NoFault' changed to integer '4'
Label 'Outliers' changed to integer '5'
Label 'Precisiondegradation' changed to integer '6'


Unnamed: 0,label,0,1,2,3,4,5,6,7,8,...,140,141,142,143,144,145,146,147,148,149
357,1,-0.902601,-0.901767,-0.904189,-0.90139,-0.903024,-0.901387,-0.900995,-0.902005,-0.898438,...,-0.901487,-0.901729,-0.902451,-0.900814,-0.900533,-0.901437,-0.900414,-0.901905,-0.903139,-0.899622
4659,4,-1.002117,-1.001448,-1.00151,-1.001929,-1.002305,-1.002051,-1.001875,-1.001429,-1.001252,...,-1.000437,-1.001006,-1.001621,-1.001333,-1.001571,-1.00126,-1.001536,-1.000756,-1.00159,-1.002059
6427,6,-1.006404,-0.997979,-1.003148,-0.998395,-0.995959,-0.991958,-1.002684,-0.999029,-1.002374,...,-1.000091,-0.995987,-1.00248,-1.001873,-0.998618,-0.997106,-1.000121,-1.006567,-1.00948,-1.007938
2103,2,8.920844,8.921125,8.92131,8.921448,8.921141,8.92141,8.921598,8.921833,8.92254,...,8.935655,8.934556,8.935713,8.934941,8.93546,8.935206,8.935322,8.935725,8.936071,8.935449
226,1,-0.895648,-0.895141,-0.899949,-0.904615,-0.908793,-0.912241,-0.911672,-0.90854,-0.903105,...,-0.895271,-0.897258,-0.897332,-0.899126,-0.899984,-0.903212,-0.906668,-0.906887,-0.907794,-0.907975
3754,3,-1.005991,-1.004992,-1.007067,-1.009662,-1.006298,-1.006222,-1.007625,-1.004857,-1.006145,...,-1.004185,-1.006145,-1.004511,-1.007529,-1.001187,-1.002359,-1.005818,-1.007702,-1.007951,-1.007721
386,1,-0.901809,-0.902251,-0.902509,-0.902578,-0.901337,-0.90194,-0.901944,-0.90111,-0.901021,...,-0.900795,-0.90033,-0.901079,-0.901102,-0.90106,-0.900983,-0.90023,-0.900456,-0.90036,-0.900403
6734,6,-0.998422,-1.003236,-0.994091,-1.004914,-0.997909,-1.000923,-1.000923,-1.000508,-1.00264,...,-0.999354,-0.997639,-0.996439,-1.003115,-0.995293,-1.00342,-1.001953,-0.996379,-0.997166,-0.995815
3931,3,-1.007067,-1.008893,-1.007951,-1.003339,-1.012141,-1.006914,-1.005223,-1.004492,-1.005915,...,-1.005319,-1.010431,-1.014101,-1.004569,-1.007625,-1.00528,-1.003089,-1.000937,-1.005549,-1.007221
1991,2,7.409771,7.408556,7.409098,7.409706,7.409859,7.410382,7.410263,7.409156,7.409591,...,7.422714,7.42223,7.423021,7.42462,7.422779,7.423579,7.425347,7.423249,7.423644,7.424225


In [83]:
total_data1.to_pickle('total_data_pickle')

In [84]:
total_data2 = pd.read_pickle('total_data_pickle')