In [46]:
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt

In [47]:
# Get the list of file paths within the sliding_window_data folder
file_paths = glob.glob("updated_data_sliding/subjectID_*.csv")

In [48]:
# Iterate over the file paths
for file_path in file_paths:
    # Load the data from the file
    data = pd.read_csv(file_path)
    
    # Rename the first column as "DateTime"
    data = data.rename(columns={data.columns[0]: "DateTime"})
    
    # Save the updated data back to the file
    data.to_csv(file_path, index=False)
    
    print("Updated file:", file_path)

Updated file: updated_data_sliding\subjectID_1.csv
Updated file: updated_data_sliding\subjectID_10.csv
Updated file: updated_data_sliding\subjectID_11.csv
Updated file: updated_data_sliding\subjectID_12.csv
Updated file: updated_data_sliding\subjectID_13.csv
Updated file: updated_data_sliding\subjectID_14.csv
Updated file: updated_data_sliding\subjectID_15.csv
Updated file: updated_data_sliding\subjectID_16.csv
Updated file: updated_data_sliding\subjectID_17.csv
Updated file: updated_data_sliding\subjectID_18.csv
Updated file: updated_data_sliding\subjectID_19.csv
Updated file: updated_data_sliding\subjectID_2.csv
Updated file: updated_data_sliding\subjectID_20.csv
Updated file: updated_data_sliding\subjectID_21.csv
Updated file: updated_data_sliding\subjectID_22.csv
Updated file: updated_data_sliding\subjectID_23.csv
Updated file: updated_data_sliding\subjectID_24.csv
Updated file: updated_data_sliding\subjectID_25.csv
Updated file: updated_data_sliding\subjectID_26.csv
Updated file: 

## Normalising the data

In [49]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

In [50]:
# Define the path to the dated_data folder
folder_path = "updated_data_sliding"

# Define the path to the norm_dated_data folder
output_folder = "updated_data_sliding_norm"

# Create the output folder if it doesn't exist
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Iterate through each file in the folder
for filename in os.listdir(folder_path):
    if filename.endswith(".csv"):
        file_path = os.path.join(folder_path, filename)
        output_file_path = os.path.join(output_folder, filename)
        
        # Load the CSV file
        df = pd.read_csv(file_path)
        
        # Exclude the DateTime column from normalization
        columns_to_normalize = df.columns[df.columns != "DateTime"]
        
        # Create a MinMaxScaler and fit-transform the data
        scaler = MinMaxScaler()
        df[columns_to_normalize] = scaler.fit_transform(df[columns_to_normalize])
        
        # Save the normalized data to the output folder
        df.to_csv(output_file_path, index=False)

## Creating seperate file for each date

In [51]:
# Define the path of the folder containing the CSV files
folder_path = 'updated_data_sliding_norm'

# Create a new folder to store the dated data files
output_folder = 'updated_data_sliding_norm_dated'
os.makedirs(output_folder, exist_ok=True)

# Iterate over the files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        # Extract the subject ID from the filename
        subject_id = filename.split('_')[0]+'_'+filename.split('_')[1][:-4]
        # print(subject_id)
        # Read the CSV file
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path)
        
        #  # Rename the first column as "DateTime"
        # df = df.rename(columns={df.columns[0]: "DateTime"})
        
        # Extract the first 6 characters from the DateTime column
        df['DatePrefix'] = df['DateTime'].str[:8]
        
        
        # Group the data by the DatePrefix and iterate over each group
        grouped = df.groupby('DatePrefix')
        for date_prefix, group in grouped:
            # Create a new filename for the date
            new_filename = f"{subject_id}_{date_prefix}.csv"
            
            # Save the group data to a new CSV file
            output_path = os.path.join(output_folder, new_filename)
            group.to_csv(output_path, index=False)

In [52]:
# Define the path of the folder containing the CSV files
folder_path = 'updated_data_sliding_norm_dated'

# Iterate over the files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        # Read the CSV file
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path)
        
        # Remove the 'DatePrefix' column
        if 'DatePrefix' in df.columns:
            df = df.drop('DatePrefix', axis=1)
        
        # Save the modified DataFrame back to the CSV file
        df.to_csv(file_path, index=False)

In [53]:
# Read the CSV file
df = pd.read_csv('fatiguelevel.csv')

# Convert Subject_ID to integer
df['Subject_ID'] = df['Subject_ID'].astype(int)

# Save the updated DataFrame to a new CSV file
df.to_csv('fatiguelevel.csv', index=False)

## Creating a csv file which contains file address and label for that day strressed or not stressed

In [54]:
# Read the fatiguelevel.csv file
df = pd.read_csv('fatiguelevel.csv')

# Iterate over each row in the DataFrame
file_locs = []
for index, row in df.iterrows():
    subject_id = row['Subject_ID']
    date = row['DateTime'][:8]  # Get the first 8 letters of the DateTime
    
    # Generate the file path
    file_name = f"subjectID_{subject_id}_{date}.csv"
    file_path = os.path.join('updated_data_sliding_norm_dated', file_name)
    
    # Check if the file exists
    if os.path.exists(file_path):
        file_locs.append(file_path)
    else:
        file_locs.append('')  # Empty string if file doesn't exist

# Add the file_loc column to the DataFrame
df['file_loc'] = file_locs

# Save the updated DataFrame to a new CSV file
df.to_csv('fatiguelevel_updated.csv', index=False)



In [55]:
# Read the updated CSV file
df = pd.read_csv('fatiguelevel_updated.csv')

# Move the 'file_loc' column to the beginning of the DataFrame
cols = df.columns.tolist()
cols = ['file_loc'] + cols[:-1]
df = df[cols]

# Save the modified DataFrame to a new CSV file
df.to_csv('fatiguelevel_updated.csv', index=False)

In [56]:
# Count the number of rows with null values in the 'file_loc' column
null_count = df['file_loc'].isnull().sum()

# Display the number of rows with null values in the 'file_loc' column
print("Number of rows with null 'file_loc':", null_count)

Number of rows with null 'file_loc': 67


In [57]:
# Delete rows with null values in the 'file_loc' column
df = df.dropna(subset=['file_loc'])

# Save the updated DataFrame to a new CSV file
df.to_csv('fatiguelevel_updated.csv', index=False)

In [58]:
df

Unnamed: 0,file_loc,Subject_ID,DateTime,Average_Stress,Stressed
0,updated_data_sliding_norm_dated\subjectID_1_14...,1,14.03.19 20:01,0.214444,0
1,updated_data_sliding_norm_dated\subjectID_1_15...,1,15.03.19 20:01,0.224444,0
2,updated_data_sliding_norm_dated\subjectID_1_16...,1,16.03.19 20:47,0.060000,0
3,updated_data_sliding_norm_dated\subjectID_1_17...,1,17.03.19 20:01,0.190000,0
4,updated_data_sliding_norm_dated\subjectID_1_18...,1,18.03.19 20:13,0.294444,0
...,...,...,...,...,...
521,updated_data_sliding_norm_dated\subjectID_28_1...,28,10.08.18 23:13,0.264462,0
522,updated_data_sliding_norm_dated\subjectID_28_1...,28,13.08.18 21:39,0.427505,0
523,updated_data_sliding_norm_dated\subjectID_28_1...,28,14.08.18 23:27,0.426277,0
524,updated_data_sliding_norm_dated\subjectID_28_1...,28,16.08.18 00:51,0.301895,0


In [59]:
data = pd.read_csv('fatiguelevel.csv')
# Count the number of rows with 'stressed' column value as 0 and 1
count_0 = data[data['Stressed'] == 0].shape[0]
count_1 = data[data['Stressed'] == 1].shape[0]

print("Number of rows with 'stressed' column value as 0:", count_0)
print("Number of rows with 'stressed' column value as 1:", count_1)

Number of rows with 'stressed' column value as 0: 422
Number of rows with 'stressed' column value as 1: 104


In [60]:
# Count the number of rows with 'stressed' column value as 0 and 1
count_0 = df[df['Stressed'] == 0].shape[0]
count_1 = df[df['Stressed'] == 1].shape[0]

print("Number of rows with 'stressed' column value as 0:", count_0)
print("Number of rows with 'stressed' column value as 1:", count_1)

Number of rows with 'stressed' column value as 0: 369
Number of rows with 'stressed' column value as 1: 90
