In [None]:
import pandas as pd
import io
from google.colab import files

# Assuming the file is already uploaded, you might need to re-upload if kernel restarted
# If not yet uploaded, this will prompt you to upload.
if 'uploaded' not in locals():
    uploaded = files.upload()

# Use the file_name from the uploaded dictionary. This assumes a single file upload.
file_name = next(iter(uploaded))

data = pd.read_csv(io.BytesIO(uploaded[file_name]))
print(data["emotion"].value_counts())

emotion
Angry     2249
Normal    1728
Sad        110
Happy       40
Fear        32
Name: count, dtype: int64


In [None]:
def merge_emotions(e):
    if e in ["Happy", "Normal"]:
        return "positive"
    elif e in ["Sad", "Fear"]:
        return "negative"
    else:
        return "aggressive"

data["emotion"] = data["emotion"].apply(merge_emotions)

print("\nAfter merging:")
print(data["emotion"].value_counts())


After merging:
emotion
aggressive    2249
positive      1768
negative       142
Name: count, dtype: int64


In [None]:
from sklearn.preprocessing import LabelEncoder

X = data.drop("emotion", axis=1)
y = data["emotion"]

breed_encoder = LabelEncoder()
X["breed"] = breed_encoder.fit_transform(X["breed"])

emotion_encoder = LabelEncoder()
y_encoded = emotion_encoder.fit_transform(y)

In [None]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y_encoded)

print("Balanced class counts:",
      dict(zip(
          emotion_encoder.classes_,
          pd.Series(y_resampled).value_counts()
      )))

Balanced class counts: {'aggressive': 2249, 'negative': 2249, 'positive': 2249}


In [None]:
from sklearn.ensemble import RandomForestClassifier

rf_model_balanced = RandomForestClassifier(
    n_estimators=300, # Number of trees in the forest
    max_depth=25,     # Maximum depth of the tree
    min_samples_leaf=2, # Minimum number of samples required to be at a leaf node
    class_weight='balanced', # Handles class imbalance by weighting samples
    random_state=42,
    n_jobs=-1         # Use all available processors
)

print("Training Random Forest Classifier on balanced data...")
rf_model_balanced.fit(X_train_resampled, y_train_resampled)
print("Training complete.")

Training Random Forest Classifier on balanced data...
Training complete.


In [None]:
from sklearn.metrics import accuracy_score, classification_report

y_pred_balanced = rf_model_balanced.predict(X_test_resampled)

accuracy_balanced = accuracy_score(y_test_resampled, y_pred_balanced)
print(f"Accuracy on balanced test set: {accuracy_balanced * 100:.2f}%")

print("\nClassification Report on balanced test set:")
print(classification_report(y_test_resampled, y_pred_balanced, target_names=emotion_encoder.classes_))

Accuracy on balanced test set: 87.63%

Classification Report on balanced test set:
              precision    recall  f1-score   support

  aggressive       0.82      0.84      0.83       450
    negative       0.96      0.99      0.97       450
    positive       0.84      0.80      0.82       450

    accuracy                           0.88      1350
   macro avg       0.88      0.88      0.88      1350
weighted avg       0.88      0.88      0.88      1350



In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X_resampled,
    y_resampled,
    test_size=0.2,
    random_state=42,
    stratify=y_resampled
)


In [None]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(
    n_estimators=500,
    max_depth=30,
    min_samples_leaf=3,
    class_weight="balanced",
    n_jobs=-1,
    random_state=42
)

print("Training started...")
rf_model.fit(X_train, y_train)
print("Training completed.")


Training started...
Training completed.


In [None]:
from sklearn.metrics import accuracy_score, classification_report

y_pred = rf_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"\nAccuracy: {accuracy * 100:.2f}%")

print("\nClassification Report:")
print(classification_report(
    y_test,
    y_pred,
    target_names=emotion_encoder.classes_
))



Accuracy: 86.67%

Classification Report:
              precision    recall  f1-score   support

  aggressive       0.81      0.83      0.82       450
    negative       0.95      0.99      0.97       450
    positive       0.83      0.78      0.81       450

    accuracy                           0.87      1350
   macro avg       0.87      0.87      0.87      1350
weighted avg       0.87      0.87      0.87      1350



In [None]:
import joblib

joblib.dump(rf_model, "dog_emotion_rf.pkl")
joblib.dump(breed_encoder, "breed_encoder.pkl")
joblib.dump(emotion_encoder, "emotion_encoder.pkl")


['emotion_encoder.pkl']

In [None]:
from google.colab import files

files.download("dog_emotion_rf.pkl")
files.download("breed_encoder.pkl")
files.download("emotion_encoder.pkl")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import joblib

joblib.dump(rf_model, r"D:\ai project\dog_emotion_rf.pkl")
joblib.dump(breed_encoder, r"D:\ai project\breed_encoder.pkl")
joblib.dump(emotion_encoder, r"D:\ai project\emotion_encoder.pkl")


['D:\\ai project\\emotion_encoder.pkl']

In [None]:
import joblib

model = joblib.load("dog_emotion_rf.pkl")
print("Model loaded successfully!")


Model loaded successfully!
