In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import SMOTE
import pickle
print("Libraries imported successfully.")

Libraries imported successfully.


In [2]:
df = pd.read_csv('Animal_Health.csv')
df['Dangerous'].fillna(df['Dangerous'].mode()[0], inplace=True) 
print("Data loaded and null values handled successfully.")

Data loaded and null values handled successfully.


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Dangerous'].fillna(df['Dangerous'].mode()[0], inplace=True)


In [3]:
encoders = {}
for col in df.columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    encoders[col] = le
print("\nCategorical columns encoded successfully.")


Categorical columns encoded successfully.


In [4]:
X = df.drop('Dangerous', axis=1)
y = df['Dangerous']
print(f"\nFeatures and target separated. X shape: {X.shape}, y shape: {y.shape}")


Features and target separated. X shape: (871, 6), y shape: (871,)


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
print(f"Data split into training and testing sets.")

Data split into training and testing sets.


In [6]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
print("\nFeatures scaled successfully.")


Features scaled successfully.


In [7]:
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train_scaled, y_train)
print("SMOTE applied to the training data.")

SMOTE applied to the training data.


In [8]:
rfc = RandomForestClassifier(random_state=42)
rfc.fit(X_train_resampled, y_train_resampled)
print("\nRandom Forest model trained successfully.")


Random Forest model trained successfully.


In [9]:
pickle.dump(rfc, open('rfc.pkl', 'wb'))
pickle.dump(scaler, open('scaler.pkl', 'wb'))
print("\n✅ Model ('rfc.pkl') and Scaler ('scaler.pkl') have been saved successfully!")



✅ Model ('rfc.pkl') and Scaler ('scaler.pkl') have been saved successfully!
