# Pickling

In [1]:
import pickle
import os
from pathlib import Path
import pandas as pd

# Function to save a single dataframe using Pickle
def save_dataframe(dataframe, filename):
    """
    Arguments:
    - dataframe: The pandas DataFrame to save.
    - filename: The file path (including name) where the DataFrame will be stored.
    """
    with open(filename, 'wb') as file:
        pickle.dump(dataframe, file)  # Serialize 
        print(f"DataFrame saved to {filename}")

# Function to save multiple dataframes iteratively
def save_multiple_dataframes(dataframes, save_directory):
    """
    Arguments:
    - dataframes: A list of pandas DataFrames to save.
    - save_directory: Directory path to store the pickle files.
    """
    # Ensure the directory exists
    if not os.path.exists(save_directory):
        os.makedirs(save_directory)
    
    # Iterate through the list of DataFrames
    for i, df in enumerate(dataframes):
        filename = os.path.join(save_directory, f"dataframe_{i + 1}.pkl")
        save_dataframe(df, filename)

# Function to load a single dataframe from a pickle file
def load_dataframe(filename):
    """
    Arguments:
    - filename: The file path (including name) to load the DataFrame from.
    Returns:
    - dataframe: The pandas DataFrame loaded from the pickle file.
    """
    with open(filename, 'rb') as file:
        dataframe = pickle.load(file)  # Deserialize the DataFrame from the pickle file
        print(f"DataFrame loaded from {filename}")
        return dataframe

# Function to load multiple dataframes from pickle files in a directory
def load_multiple_dataframes(save_directory):
    """
    Arguments:
    - save_directory: Directory path where pickle files are stored.
    Returns:
    - dataframes: A list of pandas DataFrames loaded from the pickle files.
    """
    dataframes = []
    pickle_files = [f for f in os.listdir(save_directory) if f.endswith('.pkl')]  # List of pickle files
    
    # Iterate over all pickle files and load them
    for pickle_file in pickle_files:
        file_path = os.path.join(save_directory, pickle_file)
        df = load_dataframe(file_path)
        dataframes.append(df)
    
    return dataframes


testing

In [2]:
df1 = extract_imu_data('data/raf_frames.MP4')
df2 = extract_imu_data('data/GH010045.MP4')
directory = 'pickled_bytestream'

dataframes = [df1, df2]

# Check if the directory exists, if not create it
if not os.path.exists(directory):
    os.makedirs(directory)

# Now, save the DataFrame to a pickle file inside the directory
filename = os.path.join(directory, 'imu_data.pkl')  # Specify the file name with .pkl extension

save_dataframe(df1, filename)

save_multiple_dataframes(dataframes, directory)

NameError: name 'extract_imu_data' is not defined

In [4]:
# load_dataframe('pickled_bytestream/imu_data.pkl')

load_multiple_dataframes('pickled_bytestream')

DataFrame loaded from pickled_bytestream\dataframe_1.pkl
DataFrame loaded from pickled_bytestream\dataframe_2.pkl
DataFrame loaded from pickled_bytestream\imu_data.pkl


[        TIMESTAMP    ACCL_x    ACCL_y    ACCL_z    GYRO_x    GYRO_y    GYRO_z
 0        0.000000  6.345324 -4.844125  5.832134  0.854250  0.061018 -0.793232
 1        0.005079  6.235012 -4.424460  5.827338  1.342393 -0.366107 -0.366107
 2        0.010158  6.290168 -4.714628  5.743405  1.769518 -0.549161  1.891554
 3        0.015237  6.374101 -5.340528  5.750600  1.952572 -0.427125  3.294965
 4        0.020316  6.321343 -5.455635  5.834532  2.318679 -0.366107  1.952572
 ...           ...       ...       ...       ...       ...       ...       ...
 29309  148.858689  7.383693 -5.494005  4.038369 -1.342393  0.366107 -1.952572
 29310  148.863768  7.369305 -5.323741  3.928058 -1.708500  0.122036 -1.525447
 29311  148.868847  7.429257 -5.134293  3.738609 -2.074608 -0.244071 -1.220357
 29312  148.873926  7.419664 -5.059952  3.733813 -2.135625  0.061018 -0.976286
 29313  148.879005  7.426859 -5.143885  3.798561 -1.769518  0.976286 -0.671197
 
 [29314 rows x 7 columns],
          TIMESTAMP    