In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import lightgbm as lgb

In [None]:
df=pd.read_csv("fleet_train.csv")
df.head(50)

In [None]:
df.isnull().sum()

In [None]:
df.drop(['record_id','fleetid','truckid','Region','Measurement_timestamp'], axis=1, inplace=True)

In [None]:
features_to_normalize = [
    'Engine_Load', 'Engine_RPM', 'Engine_Coolant_Temp', 'Vibration',
    'Mass_Air_Flow_Rate', 'Engine_Oil_Temp', 'Throttle_Pos_Manifold',
    'Accel_Ssor_Total', 'Trip_Distance', 'Trip_Time_journey', 'Turbo_Boost_And_Vcm_Gauge'
]

In [None]:
scaler = MinMaxScaler()
df[features_to_normalize] = scaler.fit_transform(df[features_to_normalize])

# df['engine_health_score'] = (df['Engine_Load'] + df['Engine_RPM'] + df['Engine_Coolant_Temp']) / 3
# df['battery_health_score'] = (df['Mass_Air_Flow_Rate'] + df['Engine_Oil_Temp']) / 2
# df['vibration_score'] = df['Vibration']
df.head(50)

In [None]:
def calculate_condition_score(row):
    engine_health_score = (row['Engine_Load'] + row['Engine_RPM'] + row['Engine_Coolant_Temp']) / 3
    usage_severity = row['Engine_Load'] * (row['Trip_Distance'] + row['Trip_Time_journey'])
    anomaly_flag = int((row['Vibration'] > 0.7) or (row['Engine_Coolant_Temp'] > 0.8))
    condition_score = 0.5 * engine_health_score + 0.3 * usage_severity + 0.2 * anomaly_flag
    return condition_score

df['Condition_Score'] = df.apply(calculate_condition_score, axis=1)

df.head()

In [None]:
def assign_priority(score):
    if score > 0.70:
        return 'Critical'
    elif score > 0.45:
        return 'Moderate'
    else:
        return 'Low'

In [None]:
df['Priority'] = df['Condition_Score'].apply(assign_priority)
df.to_csv('fleet_train_priorities.csv', index=False)
df.head(50)

In [None]:
df.isnull().sum()

In [None]:
df['Overstrain_Risk'] = 0.5 * df['Engine_Load'] + 0.5 * df['Engine_RPM']
df['Heat_Dissipation_Risk'] = 0.4 * df['Engine_Coolant_Temp'] + 0.6 * df['Engine_Oil_Temp']
df['Power_Failure_Risk'] = 0.5 * df['Mass_Air_Flow_Rate'] + 0.5 * df['Turbo_Boost_And_Vcm_Gauge']

def assign_failure_label(row):
    if row['Condition_Score'] < 0.3:
        return 'No Failure'
    else:
        risks = {
            'Overstrain Failure': row['Overstrain_Risk'],
            'Heat Dissipation Failure': row['Heat_Dissipation_Risk'],
            'Power Failure': row['Power_Failure_Risk']
        }
        return max(risks, key=risks.get)

df['Failure_Type'] = df.apply(assign_failure_label, axis=1)

df['Failure_Type'].value_counts()

In [None]:
df[[
    'Engine_Load', 'Engine_RPM', 'Engine_Coolant_Temp', 'Vibration',
    'Mass_Air_Flow_Rate', 'Engine_Oil_Temp', 'Throttle_Pos_Manifold',
    'Accel_Ssor_Total', 'Trip_Distance', 'Trip_Time_journey']]

In [None]:
df

In [None]:
# def get_user_input():
#     engine_load = float(input("Enter Engine Load: "))
#     engine_rpm = float(input("Enter Engine RPM: "))
#     engine_coolant_temp = float(input("Enter Engine Coolant Temperature: "))
#     trip_distance = float(input("Enter Trip Distance: "))
#     trip_time_journey = float(input("Enter Trip Time Journey: "))
#     vibration = float(input("Enter Vibration: "))
    
#     user_input_row = {
#         'Engine_Load': engine_load,
#         'Engine_RPM': engine_rpm,
#         'Engine_Coolant_Temp': engine_coolant_temp,
#         'Trip_Distance': trip_distance,
#         'Trip_Time_journey': trip_time_journey,
#         'Vibration': vibration
#     }
    
#     condition_score = calculate_condition_score(user_input_row)
#     return condition_score

# user_condition_score = get_user_input()
# print(f"Condition Score: {user_condition_score}")


In [None]:
print(df['Condition_Score'].describe())

import seaborn as sns
import matplotlib.pyplot as plt
sns.histplot(df['Condition_Score'], bins=30, kde=True)
plt.title('Condition Score Distribution')
plt.show()


In [None]:
from sklearn.model_selection import train_test_split
df1=df
df1.drop(columns=['Priority'],axis=1,inplace=True)
x=df1.drop(columns=['Failure_Type'])
y=df1['Failure_Type']

In [None]:
from sklearn.preprocessing import LabelEncoder
lb=LabelEncoder()
y=lb.fit_transform(y)
print(y)
print(lb.classes_)
print(lb.inverse_transform([0,1,2,3]))

In [None]:
xtrain,xtest, ytrain, ytest=train_test_split(x,y,test_size=0.2,random_state=42)
xtrain,xval,ytrain,yval=train_test_split(xtrain, ytrain, test_size=0.1,random_state=42)
train = lgb.Dataset(xtrain,label=ytrain)
val=lgb.Dataset(xval,label=yval)


In [None]:
params = {
    'objective': 'multiclass',
    'num_class': len(lb.classes_),  
    'boosting_type': 'gbdt',
    'metric': 'multi_logloss',
    'learning_rate': 0.05,
    'max_depth': 6,
    'num_leaves': 31,
    'min_data_in_leaf': 20,
    'lambda_l1': 0.1,
    'lambda_l2': 0.1,
    'feature_fraction': 0.8,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'verbose': -1,
    'seed': 42
}
params['is_unbalance'] = True

In [None]:
early_stopping_callback = lgb.early_stopping(stopping_rounds=30)
verbose_eval_callback = lgb.log_evaluation(period=10)

lgb_model = lgb.train(
    params,
    train,
    num_boost_round=350,  
    valid_sets=[train, val],
    callbacks=[early_stopping_callback, verbose_eval_callback]
)


In [None]:
predprob = lgb_model.predict(xtest)

pred = [np.argmax(prob) for prob in predprob]
from sklearn.metrics import accuracy_score, classification_report
print(accuracy_score(ytest, pred))

In [None]:
print(classification_report(ytest,pred))