In [17]:
import pandas as pd
import numpy as np
import os

In [8]:
current_folder_path = os.getcwd()
parent_folder_path = os.path.dirname(current_folder_path)
data_save_path = os.path.join(parent_folder_path, 'data')

In [71]:
df = pd.read_feather("df_f_feather")
df = df.drop(columns = {'time'})
df = df.drop(index=0)

In [9]:
data_save_path

'c:\\Users\\UTKU\\OneDrive\\Desktop\\MS-Term4\\ProjectArbeit\\data'

In [55]:
# Clean the spaces in the label names
df['label'] = df['label'].str.replace(' ', '')

# Split the dataset based on labels
labels = ['Bias', 'Drift', 'Gain', 'NoFault', 'Outliers', 'Precisiondegradation']

datasets = {}
dataframes =[]

for label in labels:
    datasets[label] = df[df['label'] == label]

    # Rename the dataset as df_labelname
    globals()[f'df_{label}'] = datasets[label]
    dataframes.append(f'df_{label}')
    # Optional: Save the dataset to a separate CSV file
    datasets[label].to_csv(data_save_path+ "/" +f'df_{label}.csv', index=False)

In [49]:
# Print the size of each new DataFrame
print("Size of df_Bias:", df_Bias.shape)
print("Size of df_Drift:", df_Drift.shape)
print("Size of df_Gain:", df_Gain.shape)
print("Size of df_NoFault:", df_NoFault.shape)
print("Size of df_Outliers:", df_Outliers.shape)
print("Size of df_PrecisionDegredatation:", df_Precisiondegradation.shape)

Size of df_Bias: (184700, 2)
Size of df_Drift: (184701, 2)
Size of df_Gain: (184701, 2)
Size of df_NoFault: (184177, 2)
Size of df_Outliers: (524, 2)
Size of df_PrecisionDegredatation: (184701, 2)


In [33]:
def split_column_into_rows(df, num_columns, overlapping_ratio):
    values = df['value'].to_numpy()  # Get the values from the specified column as a NumPy array

    # Calculate the number of overlapping elements between adjacent rows
    overlapping_elements = int(num_columns * overlapping_ratio / 100)

    # Calculate the total number of elements in each row, considering the overlapping elements
    row_elements = num_columns - overlapping_elements

    # Calculate the number of rows needed
    num_rows = (len(values) + row_elements - 1) // row_elements

    # Calculate the required number of values to fill the last row
    last_row_fill = (num_rows * num_columns) - len(values)

    # Extend the values array with zeros if necessary to ensure even splitting
    values = np.pad(values, (0, last_row_fill), mode='constant', constant_values=0)

    # Reshape the values array into the desired shape with overlapping
    new_values = np.zeros((num_rows, num_columns))
    for i in range(num_rows):
        start = i * row_elements
        end = start + num_columns
        new_values[i] = values[start:end]

    # Convert the new array into a DataFrame
    new_df = pd.DataFrame(new_values)

    label_name = df.at[0, 'label']
    new_df.insert(0,'label',label_name)

    return new_df

In [57]:
df_Precisiondegradation = df_Precisiondegradation.reset_index(drop=True, inplace=False)

In [59]:
df_new = split_column_into_rows(df_Precisiondegradation,10,50)

Unnamed: 0,value,label
0,-1.003319,Precisiondegradation
1,-1.005235,Precisiondegradation
2,-1.000776,Precisiondegradation
3,-1.008916,Precisiondegradation
4,-1.000741,Precisiondegradation
...,...,...
184696,-1.001516,Precisiondegradation
184697,-0.995759,Precisiondegradation
184698,-1.001589,Precisiondegradation
184699,-1.001919,Precisiondegradation


In [77]:
total_data = pd.DataFrame()
for key in datasets.keys():
    # Access the dataframe using the key
    df = datasets[key]
    df = df.reset_index(drop=True, inplace=False)
    df = split_column_into_rows(df,150,10)
    df = df.drop(df.index[-1])
    total_data = pd.concat([total_data,df])

total_data = total_data.reset_index(drop=True, inplace=False)

print(total_data.shape)



(6845, 151)


In [74]:
def convert_labels_to_integers(dataframe):
    unique_labels = dataframe['label'].unique()
    label_to_integer = {label: i+1 for i, label in enumerate(unique_labels)}
    dataframe['label'] = dataframe['label'].map(label_to_integer)
    
    for label, integer in label_to_integer.items():
        print(f"Label '{label}' changed to integer '{integer}'")
    
    return dataframe

In [78]:
total_data1 = convert_labels_to_integers(total_data)
total_data1.sample(30)

Label 'Bias' changed to integer '1'
Label 'Drift' changed to integer '2'
Label 'Gain' changed to integer '3'
Label 'NoFault' changed to integer '4'
Label 'Outliers' changed to integer '5'
Label 'Precisiondegradation' changed to integer '6'


Unnamed: 0,label,0,1,2,3,4,5,6,7,8,...,140,141,142,143,144,145,146,147,148,149
1772,2,4.438956,4.438825,4.43906,4.43826,4.438868,4.438303,4.43813,4.439498,4.439229,...,4.453429,4.454063,4.453982,4.453325,4.45356,4.454475,4.453667,4.45429,4.453717,4.454832
3545,3,-1.006414,-1.001898,-1.009508,-1.005684,-1.009239,-1.005414,-1.000418,-1.00284,-1.000168,...,-1.017484,-1.012199,-1.004473,-1.001975,-0.996939,-1.002647,-1.00136,-1.00701,-1.013217,-1.015619
130,1,-0.901306,-0.900499,-0.900599,-0.902332,-0.900526,-0.902278,-0.900537,-0.901955,-0.902866,...,-0.904108,-0.903174,-0.901137,-0.900453,-0.900014,-0.899957,-0.899326,-0.898604,-0.899119,-0.900114
1669,2,3.047695,3.050489,3.049048,3.048879,3.051043,3.054279,3.05328,3.052354,3.052062,...,3.069759,3.065127,3.063694,3.062929,3.063455,3.061242,3.064351,3.06098,3.064409,3.062718
2088,2,8.704621,8.705267,8.704433,8.705052,8.705652,8.705863,8.705648,8.705925,8.705979,...,8.719725,8.719213,8.719805,8.719525,8.719613,8.719337,8.720628,8.720302,8.720102,8.719718
5739,6,-1.000844,-1.004176,-1.009994,-1.006269,-1.002896,-1.001425,-1.006136,-0.995537,-0.998917,...,-1.004907,-1.003437,-1.001731,-0.991094,-0.999958,-0.991273,-0.998482,-0.997647,-0.998632,-0.99949
2148,2,9.514183,9.514914,9.514818,9.514841,9.51453,9.515637,9.515587,9.515756,9.515133,...,9.528987,9.530251,9.529475,9.529586,9.529798,9.529856,9.530279,9.53004,9.530225,9.529756
1377,2,-0.892764,-0.893017,-0.893244,-0.893379,-0.89294,-0.893536,-0.894343,-0.894001,-0.892733,...,-0.879264,-0.877861,-0.878395,-0.877987,-0.876988,-0.878218,-0.879267,-0.876665,-0.878756,-0.878448
4231,4,-0.999968,-1.000925,-1.000268,-1.000945,-0.999818,-1.000914,-1.001144,-1.000852,-1.000356,...,-1.000364,-1.000403,-1.000333,-1.000841,-1.000399,-1.001244,-1.000599,-1.001748,-1.001767,-1.00229
4719,4,-1.000314,-1.000883,-1.001156,-1.001994,-1.002947,-1.003931,-1.001775,-1.001217,-0.999699,...,-1.002582,-1.00254,-1.002597,-1.003577,-1.002359,-1.002682,-1.002617,-1.00169,-1.000814,-1.00096


In [83]:
total_data1.to_pickle('total_data_pickle')

In [84]:
total_data2 = pd.read_pickle('total_data_pickle')