In [2]:
import warnings
# Set the warnings to be ignored
warnings.filterwarnings('ignore')

import os
import sys
import logging
import pandas as pd 
import numpy as np 
from sklearn.preprocessing import StandardScaler 
from sklearn.linear_model  import Ridge,Lasso,RidgeCV, LassoCV, ElasticNet, ElasticNetCV, LogisticRegression
from sklearn.model_selection import train_test_split
from statsmodels.stats.outliers_influence import variance_inflation_factor 
from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve, roc_auc_score
import matplotlib.pyplot as plt

from pandas_profiling import ProfileReport
import seaborn as sns
import pickle

In [9]:
import os

def output_filler(df, path):
    # Split the path using the appropriate separator for the current OS
    path = os.path.normpath(path)
    path_parts = path.split(os.path.sep)

    # Get the second-to-last part of the path
    output = path_parts[-2]

    # Add a new "Output" column to the DataFrame and set it to the output value
    df["Output"] = output
        
    return df


In [10]:
# How output_filler works
path = "D:/Data-Science-D-drive/Datasets-D-drive/sensor_placement/bending1/dataset1.csv"
df = pd.read_csv("D:/Data-Science-D-drive/Datasets-D-drive/sensor_placement/bending1/dataset1.csv", sep = "\t")
output_filler(df,path)

Unnamed: 0,# Task: bending1,Output
0,# Frequency (Hz): 20,bending1
1,# Clock (millisecond): 250,bending1
2,# Duration (seconds): 120,bending1
3,"# Columns: time,avg_rss12,var_rss12,avg_rss13,...",bending1
4,"0,39.25,0.43,22.75,0.43,33.75,1.30",bending1
...,...,...
479,"118750,43.33,0.47,25.00,0.00,30.00,0.00",bending1
480,"119000,43.50,0.50,25.50,0.50,30.00,0.00",bending1
481,"119250,43.50,0.50,24.75,0.43,30.00,0.00",bending1
482,"119500,43.50,0.50,24.33,0.47,30.00,0.00",bending1


# Filling Output Column and Creating New DatFrames

In [60]:
# Configure the logging module
# Remove or disable existing logging handlers
root_logger = logging.getLogger()
for handler in root_logger.handlers:
    root_logger.removeHandler(handler)

# Configure the logging module with a time format
log_filename = "error.log"
log_level = logging.DEBUG
logging.basicConfig(filename=log_filename, level=log_level, format='%(asctime)s [%(levelname)s] %(message)s')  # Save errors+info to a file

op_filepath_list = []
        
try:
    logging.info(f"Reading Main Folder")
    main_folder_path = "D:\Data-Science-D-drive\Datasets-D-drive\sensor_placement"
    main_folder_path = os.path.normpath(main_folder_path)
    
    try:
        ##### LISTING AND STORING SUB-FOLDERS CONTENTS PRESENT IN MAIN FOLDER PATH I.E. BENDING1, BENDING2, SITTING, ETC ... #####
        contents = os.listdir(main_folder_path)

    except Exception as e:
        logging.error(f"Error listing contents in {main_folder_path}: {e}")

    else:
        for paths in contents:
            ##### STORING SUB-FOLDERS PATHS PRESENT IN MAIN FOLDER PATH #####
            sub_folder_path = os.path.join(main_folder_path, paths)

            try:
                #### LISTING CONTENTS PRESENT IN SUB-FOLDERS PATH I.E. DATASET1, DATASET2, DATASET3, ETC... ####
                sub_folder_contents = os.listdir(sub_folder_path)
                    
                # CREATING "OUTPUT" DIRECTORY IN EACH SUB-FOLDERS TO STORE OUTPUT CSV #   
                directory_name = "output"

                output_path = os.path.join(sub_folder_path, directory_name)
                os.makedirs(output_path, exist_ok=True)

            except Exception as e:
                logging.error(f"Error listing contents in {sub_folder_path}: {e}")

            else:
                path_list = []

                for item in sub_folder_contents:
                    #### STORING FILE'S PATH PRESENT IN SUB-FOLDERS I.E. DATASET1, DATASET2, DATASET3, ETC... ####
                    file_path = os.path.join(sub_folder_path, item)
                    path_list.append(file_path)
                            
                ### READING FILE PATHS IN SUB-FOLDERS ###
                for path in path_list:
                    logging.info("="*100)
                    try:
                        csv_content = pd.read_csv(path, sep=",", skiprows=4, header=0)
                        csv_content = csv_content.reset_index(drop=True)
                        csv_content = output_filler(csv_content, path)

                        logging.info(f"Processing {path}")
                        logging.info(csv_content.head(2).to_string(index=False))

                        ### STORING NEW FILE'S CREATED IN OUTPUT FOLDER I.E. NEW_DATASET1, NEW_DATASET2, NEW_DATASET3, ETC... ####
                        try:
                            file_name = os.path.split(path)
                            file_name = file_name[-1].split(".")[0]
                        

                            logging.info("Creating Output File Path")
                            output_file_path = os.path.join(output_path, f"new_{file_name}.csv")
                            op_filepath_list.append(output_file_path)
                            csv_content.to_csv(output_file_path)
                        
                        except Exception as e:
                            logging.error(f"Error creating Output File Path for {file_name}: {e}")


                    except Exception as e:
                        logging.error(f"Error Reading contents in {path}: {e}")


except Exception as e:
    print(e)


In [61]:
folder_path = "D:/Data-Science-D-drive/Datasets-D-drive/sensor_placement/bending1/output"

folder_path = os.path.normpath(folder_path)

# List all files and directories in the folder
contents = os.listdir(folder_path)

path_list = []

# Store the paths
for item in contents:
    path_list.append(os.path.join(folder_path, item))

In [62]:
path_list

['D:\\Data-Science-D-drive\\Datasets-D-drive\\sensor_placement\\bending1\\output\\new_dataset1.csv',
 'D:\\Data-Science-D-drive\\Datasets-D-drive\\sensor_placement\\bending1\\output\\new_dataset2.csv',
 'D:\\Data-Science-D-drive\\Datasets-D-drive\\sensor_placement\\bending1\\output\\new_dataset3.csv',
 'D:\\Data-Science-D-drive\\Datasets-D-drive\\sensor_placement\\bending1\\output\\new_dataset4.csv',
 'D:\\Data-Science-D-drive\\Datasets-D-drive\\sensor_placement\\bending1\\output\\new_dataset5.csv',
 'D:\\Data-Science-D-drive\\Datasets-D-drive\\sensor_placement\\bending1\\output\\new_dataset6.csv',
 'D:\\Data-Science-D-drive\\Datasets-D-drive\\sensor_placement\\bending1\\output\\new_dataset7.csv']

In [15]:
import logging
import os

# Remove or disable existing logging handlers
root_logger = logging.getLogger()
for handler in root_logger.handlers:
    root_logger.removeHandler(handler)

# Configure the logging module with a time format
log_filename = "concatenation.log"
log_level = logging.DEBUG
logging.basicConfig(filename=log_filename, level=log_level, format='%(asctime)s [%(levelname)s] %(message)s')

output_path = r"D:\Data-Science-D-drive\Datasets-D-drive\sensor_placement"

contents = os.list
for path in path_list:
    try:
        # List all files and directories in the folder
        contents = os.listdir(output_path)

        path_list = []

        # Store the paths
        for item in contents:
            path_list.append(os.path.join(output_path, item))

        final_df = pd.DataFrame()
        for path in path_list:
            df = pd.read_csv(path)
            df = df.reset_index(drop=True)

            final_df = pd.concat([final_df,df], ignore_index=True)
            
            # Get the second-to-last part of the path
            path_parts = path.split(os.path.sep)
            output_fname = path_parts[-1]
            print(f"{output_fname} Shape = {df.shape}")

        final_df_path = os.path.join(output_path, "final_df.csv")

        # Extract the first column as a DataFrame using iloc
        first_column_df = final_df.iloc[:, [0]]
        final_df = final_df.drop(first_column_df, axis=1)

        final_df.to_csv(final_df_path)
    
    except Exception as e:
        logging.error(f"Error processing {path}: {e}")



In [16]:
op_filepath_list

['D:\\Data-Science-D-drive\\Datasets-D-drive\\sensor_placement\\bending1\\dataset1.csv',
 'D:\\Data-Science-D-drive\\Datasets-D-drive\\sensor_placement\\bending1\\dataset2.csv',
 'D:\\Data-Science-D-drive\\Datasets-D-drive\\sensor_placement\\bending1\\dataset3.csv',
 'D:\\Data-Science-D-drive\\Datasets-D-drive\\sensor_placement\\bending1\\dataset4.csv',
 'D:\\Data-Science-D-drive\\Datasets-D-drive\\sensor_placement\\bending1\\dataset5.csv',
 'D:\\Data-Science-D-drive\\Datasets-D-drive\\sensor_placement\\bending1\\dataset6.csv',
 'D:\\Data-Science-D-drive\\Datasets-D-drive\\sensor_placement\\bending1\\dataset7.csv',
 'D:\\Data-Science-D-drive\\Datasets-D-drive\\sensor_placement\\bending1\\new_dataset1.csv',
 'D:\\Data-Science-D-drive\\Datasets-D-drive\\sensor_placement\\bending1\\new_dataset2.csv',
 'D:\\Data-Science-D-drive\\Datasets-D-drive\\sensor_placement\\bending1\\new_dataset3.csv',
 'D:\\Data-Science-D-drive\\Datasets-D-drive\\sensor_placement\\bending1\\new_dataset4.csv',
 'D:\

In [None]:
"""
for output_path in op_filepath_list:
    try:
        df[i] = pd.read_csv(output_path)
        logging.info(f"Reading Files: {output_path}")

        logging.info(f"Concatenating Datasets")

        final_df = 
    except Exception as e:
        logging.error(f"Error Reading Paths {output_path}: \n\t\t\t\t\t\t{e}")
        logging.info("-"*100)
"""

In [64]:
path_list

['D:\\Data-Science-D-drive\\Datasets-D-drive\\sensor_placement\\bending1\\output\\new_dataset1.csv',
 'D:\\Data-Science-D-drive\\Datasets-D-drive\\sensor_placement\\bending1\\output\\new_dataset2.csv',
 'D:\\Data-Science-D-drive\\Datasets-D-drive\\sensor_placement\\bending1\\output\\new_dataset3.csv',
 'D:\\Data-Science-D-drive\\Datasets-D-drive\\sensor_placement\\bending1\\output\\new_dataset4.csv',
 'D:\\Data-Science-D-drive\\Datasets-D-drive\\sensor_placement\\bending1\\output\\new_dataset5.csv',
 'D:\\Data-Science-D-drive\\Datasets-D-drive\\sensor_placement\\bending1\\output\\new_dataset6.csv',
 'D:\\Data-Science-D-drive\\Datasets-D-drive\\sensor_placement\\bending1\\output\\new_dataset7.csv']

In [66]:
final_df.shape

(3360, 9)

In [82]:
# Extract the first column as a DataFrame using iloc
first_column_df = df.iloc[:, [0]]

# Drop the first_column_df DataFrame
df = df.drop(first_column_df, axis=1)

Unnamed: 0.1,Unnamed: 0
0,0
1,1
2,2
3,3
4,4
...,...
3355,475
3356,476
3357,477
3358,478
