In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from joblib import dump
import pickle

# Load the dataset
data = pd.read_csv('data.csv')

# Handle missing values (if any)
data.fillna(method='ffill', inplace=True)


# Convert the target variable 'Dangerous' to numerical format
le = LabelEncoder()
data['Dangerous'] = le.fit_transform(data['Dangerous'])

# One-hot encode categorical symptom columns
encoder = OneHotEncoder(sparse=False)
categorical_data = data[['AnimalName', 'symptoms1', 'symptoms2', 'symptoms3', 'symptoms4', 'symptoms5']]
encoded_features = encoder.fit_transform(categorical_data)

# Save the encoder to a joblib file
with open('onehot_encoder.pkl', 'wb') as file:
    pickle.dump(encoder, file)

# Create a DataFrame with the encoded features
encoded_df = pd.DataFrame(encoded_features, columns=encoder.get_feature_names_out())

# Combine the encoded DataFrame with the target variable
data_encoded = pd.concat([encoded_df, data[['Dangerous']]], axis=1)

# Split the dataset into training and testing sets
X = data_encoded.drop('Dangerous', axis=1)  # Features
y = data_encoded['Dangerous']  # Target variable

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
from sklearn.metrics import accuracy_score, classification_report
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# Save the trained model to a joblib file
dump(model, 'random_forest_model.joblib')

print("Model saved as 'random_forest_model.joblib'")

  data.fillna(method='ffill', inplace=True)


Accuracy: 0.9885714285714285
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       0.99      1.00      0.99       173

    accuracy                           0.99       175
   macro avg       0.49      0.50      0.50       175
weighted avg       0.98      0.99      0.98       175

Model saved as 'random_forest_model.joblib'


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [3]:
pip show scikit-learn


Name: scikit-learn
Version: 1.2.2
Summary: A set of python modules for machine learning and data mining
Home-page: http://scikit-learn.org
Author: 
Author-email: 
License: new BSD
Location: C:\Users\Pavani Alla\anaconda3\Lib\site-packages
Requires: joblib, numpy, scipy, threadpoolctl
Required-by: imbalanced-learn
Note: you may need to restart the kernel to use updated packages.
