In [24]:
import pandas as pd
import os
import csv

# Dataset location information
DATASET_DIR = r"C:\Users\Aayush\Desktop\Projects\SVMvsDL\datasets\WISDM"

def convert_wisdm_txt_to_csv(input_txt_path, output_csv_path):
    """
    Convert WISDM format TXT file to CSV format
    
    Parameters:
    -----------
    input_txt_path : str
        Path to the input TXT file
    output_csv_path : str
        Path where the output CSV file will be saved
    """
    try:
        print(f"Attempting to convert file: {input_txt_path}")
        
        # First, check if the file exists
        if not os.path.exists(input_txt_path):
            print(f"Error: File does not exist: {input_txt_path}")
            return None
            
        # Create empty lists to store the data
        user_ids = []
        activities = []
        timestamps = []
        x_values = []
        y_values = []
        z_values = []
        
        # Open and read the text file
        with open(input_txt_path, 'r') as file:
            for line in file:
                # Remove any trailing semicolons and whitespace
                line = line.strip().rstrip(';')
                
                if line:  # Skip empty lines
                    # Split by comma
                    parts = line.split(',')
                    
                    # Check if we have all 6 expected parts
                    if len(parts) == 6:
                        user_ids.append(parts[0])
                        activities.append(parts[1])
                        timestamps.append(parts[2])
                        x_values.append(parts[3])
                        y_values.append(parts[4])
                        z_values.append(parts[5])
                    else:
                        print(f"Skipping malformed line: {line}")
        
        # Create a DataFrame
        df = pd.DataFrame({
            'user_id': user_ids,
            'activity': activities,
            'timestamp': timestamps,
            'x_accel': x_values,
            'y_accel': y_values,
            'z_accel': z_values
        })
        
        # Save as CSV
        df.to_csv(output_csv_path, index=False)
        print(f"Conversion successful! CSV file saved at: {output_csv_path}")
        
        # Display preview of the data
        print("\nPreview of the converted data:")
        print(df.head())
        print(f"\nTotal rows: {len(df)}")
        
        return df
        
    except Exception as e:
        print(f"Error during conversion: {e}")
        return None

def list_txt_files_in_dataset():
    """List all txt files in the WISDM dataset directory"""
    txt_files = []
    
    # First check if the directory exists
    if not os.path.exists(DATASET_DIR):
        print(f"Warning: Directory does not exist: {DATASET_DIR}")
        return txt_files
        
    for root, dirs, files in os.walk(DATASET_DIR):
        for file in files:
            if file.endswith('.txt'):
                txt_files.append(os.path.join(root, file))
    
    return txt_files

# Let's first check the directory structure
print(f"Checking directory: {DATASET_DIR}")
if os.path.exists(DATASET_DIR):
    print("Directory exists.")
    
    # List all files and directories in the WISDM folder
    print("\nAll files and directories in the WISDM folder:")
    for item in os.listdir(DATASET_DIR):
        item_path = os.path.join(DATASET_DIR, item)
        if os.path.isdir(item_path):
            print(f"Directory: {item}")
        else:
            print(f"File: {item}")
else:
    print("Directory does not exist. Please check the path.")

# List available txt files in the dataset directory
print("\nAvailable text files in the dataset directory:")
txt_files = list_txt_files_in_dataset()
if txt_files:
    for i, file in enumerate(txt_files):
        print(f"{i+1}. {file}")
else:
    print("No .txt files found in the directory.")
    
    # Try looking one level up or down
    parent_dir = os.path.dirname(DATASET_DIR)
    print(f"\nChecking parent directory: {parent_dir}")
    if os.path.exists(parent_dir):
        txt_in_parent = [os.path.join(root, file) 
                        for root, dirs, files in os.walk(parent_dir) 
                        for file in files if file.endswith('.txt')]
        if txt_in_parent:
            print("Found .txt files in parent directory or its subdirectories:")
            for i, file in enumerate(txt_in_parent):
                print(f"{i+1}. {file}")
                
    # Check if there are any subdirectories with txt files
    subdirs = [os.path.join(DATASET_DIR, d) for d in os.listdir(DATASET_DIR) 
               if os.path.isdir(os.path.join(DATASET_DIR, d))]
    for subdir in subdirs:
        print(f"\nChecking subdirectory: {subdir}")
        txt_in_subdir = [os.path.join(root, file) 
                        for root, dirs, files in os.walk(subdir) 
                        for file in files if file.endswith('.txt')]
        if txt_in_subdir:
            print(f"Found .txt files in {os.path.basename(subdir)}:")
            for i, file in enumerate(txt_in_subdir):
                print(f"{i+1}. {file}")

# Interactive function to select a file to convert
def convert_selected_file():
    """Prompt user to select a file to convert"""
    txt_files = list_txt_files_in_dataset()
    
    if not txt_files:
        print("No text files found to convert.")
        return
    
    print("\nSelect a file to convert (enter the number):")
    for i, file in enumerate(txt_files):
        print(f"{i+1}. {file}")
    
    # In a Jupyter notebook, you would manually select a file number and set it here
    # For now, let's try the first file if available
    if txt_files:
        selected_file = txt_files[0]
        output_file = selected_file.replace('.txt', '.csv')
        print(f"\nConverting {selected_file} to {output_file}")
        convert_wisdm_txt_to_csv(selected_file, output_file)

txt_file = r"C:\Users\Aayush\Desktop\Projects\SVMvsDL\datasets\WISDM\your_actual_file.txt"
csv_file = txt_file.replace('.txt', '.csv')
df = convert_wisdm_txt_to_csv(txt_file, csv_file)

# Function to convert all txt files in the directory
def convert_all_txt_files():
    """Convert all txt files in the WISDM dataset directory to CSV"""
    txt_files = list_txt_files_in_dataset()
    
    if not txt_files:
        print("No text files found to convert.")
        return
        
    for txt_file in txt_files:
        # Create corresponding CSV file name
        csv_file = txt_file.replace('.txt', '.csv')
        print(f"\nConverting {txt_file} to {csv_file}...")
        convert_wisdm_txt_to_csv(txt_file, csv_file)

# Uncomment to convert all text files found
# convert_all_txt_files()

Checking directory: C:\Users\Aayush\Desktop\Projects\SVMvsDL\datasets\WISDM
Directory does not exist. Please check the path.

Available text files in the dataset directory:
No .txt files found in the directory.

Checking parent directory: 


FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\Aayush\\Desktop\\Projects\\SVMvsDL\\datasets\\WISDM'