In [None]:
"""
Load dataframe and remove columns with static or missing values
"""
import pandas as pd

path = "../../data/processed/9.7_ajo/kafka_broker_1:11001.feather"
df = pd.read_feather(path)

# Count number of rows and cols in the original df
print(f"Loaded {len(df)} rows and {len(df.columns)} columns")

# Count the number of unique values in each column
unique_counts = df.nunique()

# Find all static columns (columns with only one or two unique values)
static_columns = unique_counts[unique_counts <= 2].index

# Remove the static columns from the dataframe
df = df.drop(static_columns, axis=1)
print(f"Removing {len(static_columns)} static columns ({len(df.columns)} remaining)")

if len(df.columns) < 100:
    # Only display if the df is small enough to not stall the IDE (thousands of columns really slows things down)
    df.head()

In [None]:
"""
Change the dataframe headers to a more human-readable format
"""

from utils.header_cleaner import clean_up_headers
cleaned_df = clean_up_headers(df)
cleaned_df



In [None]:
"""
TODO: Extend this script to do some plotting (maybe copy-and-paste this code into separate file first)
"""

import difflib
target_word = 'kepler joules'
closest_matches = difflib.get_close_matches(target_word, cleaned_df.columns, cutoff=0.05)
print(closest_matches)
cleaned_df[closest_matches].plot()