In [3]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("patrickfleith/nasa-battery-dataset")

Downloading from https://www.kaggle.com/api/v1/datasets/download/patrickfleith/nasa-battery-dataset?dataset_version_number=2...


100%|██████████| 228M/228M [00:01<00:00, 170MB/s]

Extracting files...





In [18]:
!mkdir plots

mkdir: cannot create directory ‘plots’: File exists


In [19]:
import pandas as pd
from datetime import datetime
import numpy as np
import plotly.graph_objects as graph
import plotly.io as ply

# Function to load and clean data
def load_and_clean_data(dataset_path):
    metadata = pd.read_csv(f"{dataset_path}/cleaned_dataset/metadata.csv")
    impedance_data = metadata[metadata['type'] == 'impedance'][['start_time', 'Re', 'Rct', 'battery_id']]
    file_data = metadata[metadata['type'] == 'impedance'][['start_time', 'filename', 'battery_id']]
    return impedance_data, file_data

# Function to parse start_time
def convert_to_datetime(time_string):
    try:
        if isinstance(time_string, str):
            time_string = time_string.strip("[]").replace(",", "")
            components = [float(x) for x in time_string.split()]
            if len(components) == 6:
                year, month, day, hour, minute = map(int, components[:5])
                second = int(components[5])  # Handle fractional seconds
                return datetime(year, month, day, hour, minute, second)
        elif isinstance(time_string, (list, np.ndarray)) and len(time_string) == 6:
            year, month, day, hour, minute = map(int, time_string[:5])
            second = int(float(time_string[5]))  # Handle fractional seconds
            return datetime(year, month, day, hour, minute, second)
    except (ValueError, SyntaxError, TypeError) as e:
        print(f"Failed to parse: {time_string}, Error: {e}")
    return pd.NaT

def extract_absolute_value(value):
    try:
        return abs(complex(value))
    except (ValueError, TypeError) as e:
        print(f"Error processing value {value}: {e}")
    return np.nan

impedance_df, file_df = load_and_clean_data(path)

# Apply parsing functions to the data
impedance_df['start_time'] = impedance_df['start_time'].apply(convert_to_datetime)
file_df['start_time'] = file_df['start_time'].apply(convert_to_datetime)
impedance_df['Re'] = impedance_df['Re'].apply(extract_absolute_value)
impedance_df['Rct'] = impedance_df['Rct'].apply(extract_absolute_value)


In [20]:
impedance_df = impedance_df.dropna(subset=['start_time'])
impedance_df= impedance_df.sort_values(by='start_time')

# Plot for each battery_id
for battery_id in impedance_df['battery_id'].unique():
    battery_data = impedance_df[impedance_df['battery_id'] == battery_id]
    if battery_data.empty:
        print(f"No data available for battery_id {battery_id}")
        continue

    # Create the figure
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=battery_data['start_time'],
        y=battery_data['Re'],
        mode='lines+markers',
        name='Re',
        line=dict(color='blue')
    ))
    fig.add_trace(go.Scatter(
        x=battery_data['start_time'],
        y=battery_data['Rct'],
        mode='lines+markers',
        name='Rct',
        line=dict(color='red')
    ))

    fig.update_layout(
        title=f"Resistance Change Over Time for Battery ID: {battery_id}",
        xaxis_title="Start Time",
        yaxis_title="Magnitude of Resistance (Ohms)",
        xaxis=dict(tickangle=45),
        legend_title="Legend",
        legend_font_size=16,
        template="plotly",
        height=1000,
        width=1500,
    )


    fig.write_image(f"plots/battery_{battery_id}_plot.png")

In [21]:
import pandas as pd
import plotly.graph_objects as go
from datetime import datetime


file_df = file_df.dropna(subset=['start_time'])
file_df = file_df.sort_values(by='start_time')
battery_data = {}

for index, row in file_df.iterrows():
  battery_id = row['battery_id']
  filename = row['filename']
  start_time = row['start_time']

  file_path = f"{path}/cleaned_dataset/data/{filename}"
  try:
        data = pd.read_csv(file_path)
        if 'Battery_impedance' in data.columns:
            impedance_values = data['Battery_impedance'].apply(lambda x: abs(complex(x)))
            avg_impedance = impedance_values.mean()
            if battery_id not in battery_data:
                battery_data[battery_id] = {'start_time': [], 'impedance': []}

            battery_data[battery_id]['start_time'].append(start_time)
            battery_data[battery_id]['impedance'].append(avg_impedance)
        else:
            print(f"'Battery_impedance' column not found in {filename}")
  except Exception as e:
        print(f"Error processing file {filename}: {e}")


for battery_id, data in battery_data.items():
      fig = go.Figure()
      fig.add_trace(go.Scatter(x=data['start_time'], y=data['impedance'], mode='lines+markers', name=f'Battery {battery_id}'))
      fig.update_layout(
          title=f'Impedance Change Over Time for Battery {battery_id}',
          xaxis_title='Start Time',
          yaxis_title='Magnitude of Impedance (Ohms)',
          xaxis=dict(tickangle=45),
          template='plotly_white',
          height=1000,
          width=1500,
      )
      fig.write_image(f"plots/battery_{battery_id}_impedance_plot.png")