<a href="https://colab.research.google.com/github/corrin/word-triplet-generator/blob/master/Fun_with_Time_Series.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# prompt: Please download the bearings dataset from https://data.nasa.gov/download/brfb-gzcv/application%2Fzip
%%bash
# Check if the file exists
if [ ! -f "bearings.zip" ]; then
    echo "Downloading bearings dataset..."
    wget https://data.nasa.gov/download/brfb-gzcv/application%2Fzip -O bearings.zip
else
    echo "File already downloaded."
fi

# Check if the data has been extracted
if [ ! -d "IMS" ]; then
    echo "Unzipping dataset..."
    unzip bearings.zip
else
    echo "Data already unzipped."
fi


File already downloaded.
Data already unzipped.


In [20]:
%%bash

# Map RAR files to filenames

# Navigate to the IMS directory
cd IMS

# Define RAR files to check
rar_files=("1st_test.rar" "2nd_test.rar" "3rd_test.rar")

# Loop through each RAR file and list unique top-level directories
for rar_file in "${rar_files[@]}"; do
    echo "Directories in $rar_file:"
    # Skip the first 7 lines, then extract the unique top-level directories
    unrar l "$rar_file" | tail -n +8 | awk '{print $5}' | cut -d'/' -f1 | sort -u
    echo ""
done


Directories in 1st_test.rar:

----
1st_test

Directories in 2nd_test.rar:

----
2nd_test

Directories in 3rd_test.rar:

----
4th_test



In [19]:
%%bash

# Actually extract the files

cd IMS

# Define RAR files and their corresponding output directories
declare -A rar_files
rar_files["1st_test.rar"]="1st_test"
rar_files["2nd_test.rar"]="2nd_test"
rar_files["3rd_test.rar"]="4th_test"  # Mapping confirmed as 3rd_test.rar -> 4th_test

# Loop through each RAR file and extract only if the directory doesn't already exist
for rar_file in "${!rar_files[@]}"; do
    output_dir="${rar_files[$rar_file]}"

    if [ ! -d "$output_dir" ]; then
        echo "Extracting $rar_file to $output_dir..."
        unrar x -y -o- "$rar_file"
    else
        echo "$output_dir already exists. Skipping extraction for $rar_file."
    fi
done


1st_test already exists. Skipping extraction for 1st_test.rar.
4th_test already exists. Skipping extraction for 3rd_test.rar.
2nd_test already exists. Skipping extraction for 2nd_test.rar.


In [21]:
import os

# List the first few files in the 1st_test directory
sample_files = os.listdir("IMS/1st_test")
sample_files[:5]  # Display first 5 files


['2003.11.23.16.46.56',
 '2003.10.31.12.05.51',
 '2003.11.22.08.36.56',
 '2003.11.23.04.16.56',
 '2003.11.22.15.26.56']

In [22]:
import numpy as np

# Define the file path
file_path = "IMS/1st_test/2003.11.23.16.46.56"

# Load the data from the file and display the first 10 rows
data = np.loadtxt(file_path)
print(data[:10])  # Display the first 10 rows


[[-0.232 -0.09  -0.061 -0.229 -0.205 -0.195 -0.042 -0.046]
 [-0.181 -0.137 -0.261 -0.193 -0.051 -0.002  0.027 -0.264]
 [-0.095 -0.217 -0.303 -0.093 -0.027 -0.139  0.132 -0.154]
 [-0.166 -0.212 -0.083 -0.161 -0.229  0.015 -0.117 -0.107]
 [-0.217 -0.129  0.037 -0.178 -0.288 -0.239 -0.193 -0.203]
 [-0.156 -0.166 -0.012 -0.129 -0.225 -0.278 -0.171 -0.173]
 [-0.088 -0.173 -0.205 -0.139 -0.081 -0.249 -0.083 -0.161]
 [-0.178 -0.154 -0.208 -0.129 -0.151 -0.034 -0.017 -0.464]
 [-0.239 -0.1   -0.01  -0.129 -0.159 -0.002 -0.088 -0.125]
 [-0.007 -0.193  0.09  -0.059 -0.217 -0.039 -0.229 -0.059]]


In [23]:
import numpy as np
import os

# Path to a sample file
file_path = "IMS/1st_test/2003.11.23.16.46.56"

# Load the data
data = np.loadtxt(file_path)

# Check shape and memory usage of the data
print("Shape of data:", data.shape)
print("Memory usage of one file (MB):", data.nbytes / (1024 ** 2))


Shape of data: (20480, 8)
Memory usage of one file (MB): 1.25


In [24]:
# Count the total number of files in the 1st_test directory
file_count = len(os.listdir("IMS/1st_test"))

# Estimate total memory usage for all files in MB
total_memory_usage = data.nbytes * file_count / (1024 ** 2)
print("Estimated total memory usage for all files (MB):", total_memory_usage)


Estimated total memory usage for all files (MB): 2695.0


In [25]:
import numpy as np
import os

# Path to the 1st_test directory (Download stage)
directory_path = "IMS/1st_test"

# Load all files into a single 3D numpy array
all_data = []
file_names = sorted(os.listdir(directory_path))  # Sort files for consistent ordering

for filename in file_names:
    file_path = os.path.join(directory_path, filename)
    data = np.loadtxt(file_path)
    all_data.append(data)

# Convert to 3D array (files x time steps x channels)
all_data = np.array(all_data)
print("Raw data shape:", all_data.shape)  # Should print (num_files, 20480, 8)


Raw data shape: (2156, 20480, 8)
