In [1]:
from pathlib import Path

import pandas as pd
from pmlb import fetch_data

from howso.engine import (
    load_trainee,
    Trainee,
)
from howso.utilities import infer_feature_attributes

import pandas as pd
from howso.visuals import plot_feature_importances
import random

In [44]:
import numpy as np
import pandas as pd
import random
import datetime

def generate_car_wear_data(
    num_cars=10,
    num_days=1000,
    start_date="2022-01-01",
    mileage_start_range=(0, 5000),
    daily_miles_range=(20, 100),
    env_temp_range=(-10, 40),
    env_humidity_range=(20, 90),
    initial_tire_thickness_range=(7.0, 9.0),  # mm
    initial_brake_thickness_range=(8.0, 12.0),  # mm
    vibration_threshold=6.0,             # threshold for vibration_level
    random_seed=random.randint(1, 10000)
):
    """
    Generate synthetic data for multiple cars over a given number of days,
    tracking the number of times maintenance is triggered by each trigger type.
    """
    np.random.seed(random_seed)
    random.seed(random_seed)

    start_dt = datetime.datetime.strptime(start_date, "%Y-%m-%d")
    records = []
    maintenance_counts = []  # List to store maintenance counts for each car

    for car_id in range(1, num_cars + 1):
        current_date = start_dt
        current_mileage = random.randint(*mileage_start_range)

        # Initialize variables for this car
        tire_thickness = np.random.uniform(*initial_tire_thickness_range)
        brake_thickness = np.random.uniform(*initial_brake_thickness_range)

        consecutive_days_excessive_vibration = 0
        brake_maintenance_count = 0
        vibration_maintenance_count = 0

        for day in range(num_days):
            # Daily environment
            env_temp = np.random.uniform(*env_temp_range)
            env_humidity = np.random.uniform(*env_humidity_range)

            # Drive some miles
            daily_miles = np.random.randint(*daily_miles_range)
            current_mileage += daily_miles

            # Wear & tear
            tire_thickness -= 0.001 * daily_miles + np.random.normal(0, 0.002)
            brake_thickness -= 0.0005 * daily_miles + np.random.normal(0, 0.001)

            # Clip
            tire_thickness = max(tire_thickness, 0)
            brake_thickness = max(brake_thickness, 0)

            # Vibration
            vibration_level = np.random.normal(5.0, 2.0)  # mean=5, std=2
            if vibration_level > vibration_threshold:
                consecutive_days_excessive_vibration += 1
            else:
                consecutive_days_excessive_vibration = 0

            # Check if maintenance is needed
            brake_trigger = (brake_thickness < 6.0)
            vibration_trigger = (consecutive_days_excessive_vibration >= 3)

            maintenance_needed = (brake_trigger or vibration_trigger)

            # Record today's data
            record = {
                "car_id": car_id,
                "date": current_date.strftime("%Y-%m-%d"),
                "env_temperature": round(env_temp, 2),
                "env_humidity": round(env_humidity, 2),
                "tire_thickness_mm": round(tire_thickness, 3),
                "brake_pad_thickness_mm": round(brake_thickness, 3),
                "vibration_level": round(vibration_level, 2),
                "maintenance_needed": int(maintenance_needed),
                # "brake_trigger": int(brake_trigger),
                # "vibration_trigger": int(vibration_trigger),
            }
            records.append(record)

            # If maintenance is needed, count the type and reset affected variables
            if maintenance_needed:
                if brake_trigger:
                    brake_maintenance_count += 1
                    brake_thickness = np.random.uniform(*initial_brake_thickness_range)

                if vibration_trigger:
                    vibration_maintenance_count += 1
                    consecutive_days_excessive_vibration = 0

            # Move to the next day
            current_date += datetime.timedelta(days=1)

        # Store maintenance counts per car
        maintenance_counts.append({
            "car_id": car_id,
            "brake_maintenance_count": brake_maintenance_count,
            "vibration_maintenance_count": vibration_maintenance_count
        })

    # Convert to DataFrames
    df = pd.DataFrame(records)
    maintenance_df = pd.DataFrame(maintenance_counts)  # Separate maintenance count tracking

    return df, maintenance_df


# # Example Usage
# df, maintenance_df = generate_car_wear_data()


In [46]:
df.to_csv('sample.csv')

In [45]:
df, maintenance_df = generate_car_wear_data()

# df = generate_car_wear_data(
#     num_cars=10,
#     num_days=365,
#     start_date="2022-01-01",
# )

In [4]:
maintenance_df

Unnamed: 0,car_id,brake_maintenance_count,vibration_maintenance_count
0,1,7,23
1,2,8,24
2,3,8,24
3,4,8,20
4,5,6,19
5,6,8,15
6,7,7,25
7,8,6,23
8,9,7,21
9,10,7,18


In [5]:
maintenance_df.sum()

car_id                          55
brake_maintenance_count         72
vibration_maintenance_count    212
dtype: int64

In [6]:
df.shape

(10000, 8)

In [7]:
len(df[df['maintenance_needed'] == 1])

281

In [8]:
# df = df[df['maintenance_needed'] == 0]

In [9]:
# Identify id-feature and time-feature
id_feature_name = "car_id"
time_feature_name = "date"
features = infer_feature_attributes(
    df,
    time_feature_name=time_feature_name,
    id_feature_name=id_feature_name,
    num_lags=3
)
# Specify Context and Action Features
action_features = ['maintenance_needed']

In [10]:
features['maintenance_needed']

{'type': 'nominal',
 'data_type': 'number',
 'decimal_places': 0,
 'original_type': {'data_type': 'integer', 'size': 8},
 'bounds': {'allow_null': False},
 'time_series': {'type': 'rate', 'num_lags': 3}}

In [11]:
# Create the Trainee
t = Trainee(features=features)

The following parameters from your configuration will override the default Amalgam parameters: {'trace'}


In [12]:
# Train
t.train(df)
# Analyze the Trainee
# (By specifying action_features, this becomes a Targeted analysis)
# t.analyze(context_features=context_features, action_features=action_features)
# t.analyze(context_features=context_features, action_features=action_features)
t.analyze()

In [13]:
context_features = [f for f in t.features.keys() if f not in [
    'date', action_features[0], '.maintenance_needed_lag_1', '.maintenance_needed_lag_3', '.maintenance_needed_lag_2', 'car_id',
    '.date_delta_1', '.date_lag_1', '.series_progress', '.series_progress_delta']]

In [14]:
stats = t.react_aggregate(
    action_feature=action_features[0],
    action_features=action_features,
    context_features=context_features,
    details={
        "feature_contributions_robust": True,
        "feature_residuals_robust": True,
        "context_condition": {
            'maintenance_needed': 0,
        },
        "action_condition": {
            'maintenance_needed': 0,
            '.maintenance_needed_lag_1': 0,
            '.maintenance_needed_lag_2': 0,
            '.maintenance_needed_lag_3': 0,
        }

    },
)


In [18]:
t.react_into_features(
    similarity_conviction=True,
    familiarity_conviction_addition=True,
    familiarity_conviction_removal=True,
)

In [40]:
features_retrieve = list(t.features.keys()) + ['similarity_conviction', 'familiarity_conviction_addition']

In [41]:
stored_convictions = t.get_cases(features=features_retrieve)


Calling get_cases without a session id does not guarantee case order.



In [42]:
stored_convictions

Unnamed: 0,.env_temperature_lag_3,.env_humidity_lag_3,.brake_pad_thickness_mm_lag_3,.env_temperature_lag_2,.env_temperature_lag_1,.env_humidity_lag_2,.brake_pad_thickness_mm_lag_1,.tire_thickness_mm_rate_1,tire_thickness_mm,.tire_thickness_mm_lag_3,...,.series_progress,env_humidity,.tire_thickness_mm_lag_1,.series_progress_delta,maintenance_needed,.vibration_level_lag_2,.date_delta_1,.env_temperature_rate_1,similarity_conviction,familiarity_conviction_addition
0,,,,,,,,,7.715,,...,0.000000,88.24,,0.001001,0,,,,0.157375,0.003160
1,,,,,31.30,,9.607,-7.870370e-07,7.647,,...,0.001001,71.12,7.715,0.001001,0,,86400.0,0.000073,0.181501,0.006880
2,,,,31.30,37.63,88.24,9.573,-1.099537e-06,7.552,,...,0.002002,48.75,7.647,0.001001,0,4.55,86400.0,-0.000189,0.215567,0.014607
3,31.30,88.24,9.607,37.63,21.31,71.12,9.525,-3.472222e-07,7.522,7.715,...,0.003003,60.52,7.552,0.001001,0,9.17,86400.0,-0.000226,0.918920,0.467919
4,37.63,71.12,9.573,21.31,1.78,48.75,9.509,-3.703704e-07,7.490,7.647,...,0.004004,28.14,7.522,0.001001,0,7.53,86400.0,-0.000130,0.829936,0.374914
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,-2.04,67.16,9.710,13.96,22.00,89.34,9.655,0.000000e+00,0.000,0.000,...,0.995996,29.71,0.000,0.001001,0,4.39,86400.0,0.000092,0.955297,7.117376
9996,13.96,89.34,9.692,22.00,29.96,88.86,9.626,0.000000e+00,0.000,0.000,...,0.996997,83.42,0.000,0.001001,0,5.20,86400.0,-0.000025,0.967008,7.744484
9997,22.00,88.86,9.655,29.96,27.76,29.71,9.614,0.000000e+00,0.000,0.000,...,0.997998,72.13,0.000,0.001001,0,4.73,86400.0,-0.000318,0.974611,5.264917
9998,29.96,29.71,9.626,27.76,0.27,83.42,9.589,0.000000e+00,0.000,0.000,...,0.998999,79.22,0.000,0.001001,0,4.81,86400.0,0.000012,0.994474,3.226714


In [22]:
stored_convictions.columns

Index(['.brake_pad_thickness_mm_lag_1', '.brake_pad_thickness_mm_lag_2',
       '.brake_pad_thickness_mm_lag_3', '.brake_pad_thickness_mm_rate_1',
       '.date_delta_1', '.date_lag_1', '.env_humidity_lag_1',
       '.env_humidity_lag_2', '.env_humidity_lag_3', '.env_humidity_rate_1',
       '.env_temperature_lag_1', '.env_temperature_lag_2',
       '.env_temperature_lag_3', '.env_temperature_rate_1',
       '.maintenance_needed_lag_1', '.maintenance_needed_lag_2',
       '.maintenance_needed_lag_3', '.series_progress',
       '.series_progress_delta', '.tire_thickness_mm_lag_1',
       '.tire_thickness_mm_lag_2', '.tire_thickness_mm_lag_3',
       '.tire_thickness_mm_rate_1', '.vibration_level_lag_1',
       '.vibration_level_lag_2', '.vibration_level_lag_3',
       '.vibration_level_rate_1', 'brake_pad_thickness_mm', 'car_id', 'date',
       'env_humidity', 'env_temperature', 'maintenance_needed',
       'tire_thickness_mm', 'vibration_level'],
      dtype='object')

In [None]:
stored_convictions['similarity_conviction']

KeyError: 'similarity_conviction'

In [15]:
feature_residuals=stats['feature_residuals_robust']['maintenance_needed']
fc_robust = pd.DataFrame.from_dict([stats['feature_contributions_robust']])
fc_robust

Unnamed: 0,.env_temperature_lag_3,.env_humidity_lag_3,.brake_pad_thickness_mm_lag_3,.env_temperature_lag_2,.env_temperature_lag_1,.env_humidity_lag_2,.tire_thickness_mm_lag_3,.brake_pad_thickness_mm_lag_1,.tire_thickness_mm_rate_1,tire_thickness_mm,...,.tire_thickness_mm_lag_2,vibration_level,brake_pad_thickness_mm,.env_humidity_rate_1,env_temperature,.vibration_level_rate_1,env_humidity,.tire_thickness_mm_lag_1,.vibration_level_lag_2,.env_temperature_rate_1
0,0.008726,0.008923,0.006257,0.008488,0.007433,0.00876,0.005017,0.006598,0.005915,0.003945,...,0.00463,0.014766,0.006623,0.007451,0.007779,0.00949,0.008462,0.00448,0.016768,0.007294


In [16]:
plot_feature_importances(fc_robust, feature_residuals=feature_residuals)