In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import joblib
from sklearn.metrics import classification_report

### 1. Read the dataset and show a sample
Display the first 3 rows of the dataset

In [1]:
df = pd.read_csv('ActivityRecommendSurvey.csv')
print(df.head(3))

NameError: name 'pd' is not defined

### 2. Encoding categorical variables
Using LabelEncoder for columns that have ordinal values (like 'Meslek' and 'Cinsiyet')

In [None]:
df

Unnamed: 0,Yaş,Cinsiyet,Meslek,Mutluyken,Üzgünken,Öfkeliyken
0,21,Erkek,Öğrenci,Doğa Yürüyüşü,Yürüyüş (Doğa - Sahil - Park),Müzik Dinlemek
1,21,Kadın,Öğrenci,Müzik Dinlemek,Birileriyle Konuşmak,Derin Nefes Egzersizi
2,24,Erkek,Öğrenci,Dans Etmek,Birileriyle Konuşmak,Mola Vermek
3,20,Erkek,Öğrenci,Müzik Dinlemek,Film - Dizi İzlemek,Müzik Dinlemek
4,19,Erkek,Öğrenci,Spor Yapmak (Bisiklet - Koşu - Yüzme),Film - Dizi İzlemek,Müzik Dinlemek
...,...,...,...,...,...,...
175,23,Erkek,Öğrenci,Doğa Yürüyüşü,Film - Dizi İzlemek,Müzik Dinlemek
176,25,Kadın,Doktor,Müzik Dinlemek,Yemek yapmak/yemek,Müzik Dinlemek
177,21,Kadın,Endüstri Mühendisi,Müzik Dinlemek,Film - Dizi İzlemek,Duş Almak
178,22,Kadın,Öğrenci,Dans Etmek,Birileriyle Konuşmak,Derin Nefes Egzersizi


In [None]:
import numpy as np


df_encoded = pd.read_csv('ActivityRecommendSurvey.csv')
def encoder(column,frame=df_encoded):
    le=LabelEncoder()
    # if i use this commented code, i cant inverse transform back to original. However, fit_transform from pandas is work well after inverse_transform.
    # It may be due to how pands internally tracks categorical data when assigned in one step versus step-by-step
    # le.fit(frame[column])
    # frame[column] = le.transform(frame[column])
    """Encodes the given column using LabelEncoder""" 
    frame[column]=le.fit_transform(frame[column])
    return le
# Label Encoding for columns: 'Cinsiyet', 'Meslek', 'Mutluyken', 'Üzgünken', 'Öfkeliyken'
le_gender = encoder('Cinsiyet')
le_job = encoder('Meslek')
le_happy = encoder('Mutluyken')
le_sad = encoder('Üzgünken')
le_angry = encoder('Öfkeliyken')


# OneHotEncoder can be used for categorical features like 'Cinsiyet' in a more explicit way
# 0 for man, 1 for woman
oe_style = OneHotEncoder()
oe_results = oe_style.fit_transform(df_encoded[["Cinsiyet"]])

# Encode values (already done inside the function)
print("DataFrame:\n", df)
print("Encoded DataFrame:\n", df_encoded)

# Reverse transformation

decoded_happy = le_happy.inverse_transform(df_encoded["Mutluyken"])
decoded_sad = le_sad.inverse_transform(df_encoded["Üzgünken"])
decoded_angry = le_angry.inverse_transform(df_encoded["Öfkeliyken"])

print("\nDecoded Values:")
print("Mutluyken:", np.unique(decoded_happy))
print("Üzgünken:", np.unique(decoded_sad))
print("Öfkeliyken:", np.unique(decoded_angry))

DataFrame:
      Yaş Cinsiyet              Meslek                              Mutluyken  \
0     21    Erkek             Öğrenci                          Doğa Yürüyüşü   
1     21    Kadın             Öğrenci                         Müzik Dinlemek   
2     24    Erkek             Öğrenci                             Dans Etmek   
3     20    Erkek             Öğrenci                         Müzik Dinlemek   
4     19    Erkek             Öğrenci  Spor Yapmak (Bisiklet - Koşu - Yüzme)   
..   ...      ...                 ...                                    ...   
175   23    Erkek             Öğrenci                          Doğa Yürüyüşü   
176   25    Kadın              Doktor                         Müzik Dinlemek   
177   21    Kadın  Endüstri Mühendisi                         Müzik Dinlemek   
178   22    Kadın             Öğrenci                             Dans Etmek   
179   22    Kadın             Öğrenci                             Dans Etmek   

                          Ü

In [None]:
# Show the one-hot encoded results for the 'Cinsiyet' column
print(pd.DataFrame(oe_results.toarray(), columns=oe_style.categories_))

       0    1
0    1.0  0.0
1    0.0  1.0
2    1.0  0.0
3    1.0  0.0
4    1.0  0.0
..   ...  ...
175  1.0  0.0
176  0.0  1.0
177  0.0  1.0
178  0.0  1.0
179  0.0  1.0

[180 rows x 2 columns]


### 3. Train-test split and training the model
Features for training: 'Yaş', 'Cinsiyet', 'Meslek'

In [None]:
features = df_encoded[['Yaş', 'Cinsiyet', 'Meslek']]

# Labels for different emotional states
labels = {
    "sad":df_encoded["Üzgünken"],
    "happy":df_encoded["Mutluyken"],
    "angry":df_encoded["Öfkeliyken"],
}

# Splitting the data into training and testing sets (80% train, 20% test)
X_train_happy, X_test_happy, y_train_happy, y_test_happy = train_test_split(features, labels["happy"], test_size=0.2, random_state=42)
X_train_sad, X_test_sad, y_train_sad, y_test_sad = train_test_split(features, labels["sad"], test_size=0.2, random_state=42)
X_train_angry, X_test_angry, y_train_angry, y_test_angry = train_test_split(features, labels["angry"], test_size=0.2, random_state=42)

classification_report is a function in scikit-learn used to evaluate the performance of a classification model by providing metrics such as precision, recall, f1-score, and support for each class. It gives you a detailed performance report for your classifier, showing how well it performed across different classes.

| Metric        | What It Measures                          | When is High Value Good?                                      | When to Focus on It?                           |
|--------------|--------------------------------------|--------------------------------------------------|--------------------------------|
| **Precision** | Correctness of positive predictions | When false positives are bad (e.g., spam detection, medical diagnosis) | You care about **being right** when predicting a class |
| **Recall**    | Coverage of actual positive cases   | When false negatives are bad (e.g., cancer detection, fraud detection) | You care about **not missing** a real case |
| **F1-Score**  | Balance of precision & recall      | When both precision & recall matter             | There’s a **trade-off** between precision and recall |
| **Support**   | How many samples exist per class   | -                                                | If a class has low support, the model may struggle with it |
| **Accuracy**  | Overall correctness                | When classes are balanced                        | Use only when data is **balanced** |
| **Macro Avg** | Average performance across all classes (treats all equally) | When all classes are equally important | Use when all classes need equal attention |
| **Weighted Avg** | Average weighted by class frequency | When classes are imbalanced | Use when some classes have **much more data** than others |


In [None]:
# Train and evaluate model using Random Forest Classifier
import numpy as np


def train_and_evaluate(X_train, X_test, y_train, y_test, label_encoder, state):
    """Train and evaluate a RandomForest model for each emotional state"""
    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    target_names = [str(label) for label in label_encoder.inverse_transform(np.unique(y_test))]
    print(f"{state} State Classification Report (with target names):")
    print(classification_report(y_test, y_pred, target_names=target_names, zero_division=0))
    
    return model

# Training and evaluating the models for each emotion
modelHappy = train_and_evaluate(X_train_happy, X_test_happy, y_train_happy, y_test_happy, le_happy, 'Happy')
modelSad = train_and_evaluate(X_train_sad, X_test_sad, y_train_sad, y_test_sad, le_sad, 'Sad')
modelAngry = train_and_evaluate(X_train_angry, X_test_angry, y_train_angry, y_test_angry, le_angry, 'Angry')

Happy State Classification Report (with target names):
                                       precision    recall  f1-score   support

                           Dans Etmek       0.08      0.25      0.12         4
                        Doğa Yürüyüşü       0.50      0.33      0.40         6
El İşi (Resim - heykel - dikiş nakış)       0.00      0.00      0.00         2
                         Kitap Okumak       0.00      0.00      0.00         2
                       Müzik Dinlemek       0.56      0.36      0.43        14
Spor Yapmak (Bisiklet - Koşu - Yüzme)       0.50      0.50      0.50         8

                             accuracy                           0.33        36
                            macro avg       0.27      0.24      0.24        36
                         weighted avg       0.42      0.33      0.36        36

Sad State Classification Report (with target names):
                               precision    recall  f1-score   support

         Birileriyle Konuşm

### 4. Save models and label encoders using joblib
Saving the trained RandomForest models and label encoders for later use

In [None]:
joblib.dump(modelHappy, 'models/rfHappyModel.pkl')
joblib.dump(modelSad, 'models/rfSadModel.pkl')
joblib.dump(modelAngry, 'models/rfAngryModel.pkl')

joblib.dump(le_gender, 'models/propsModels/le_gender.pkl')
joblib.dump(le_job, 'models/propsModels/le_job.pkl')
joblib.dump(le_happy, 'models/propsModels/le_happy.pkl')
joblib.dump(le_sad, 'models/propsModels/le_sad.pkl')
joblib.dump(le_angry, 'models/propsModels/le_angry.pkl')

['models/propsModels/le_angry.pkl']

In [None]:
#TODO deneme - stiring den numeric encoded value yu elde etmeye çalışma
# BİTTİ
job = "Öğrenci"
encoded_job = le_job.transform([job])[0] if job in le_job.classes_ else None
print(encoded_job)

24


## **Activity Prediction Based on User Input**  

This script predicts an activity based on user inputs for **age, gender, job, and mood** using a trained **Random Forest model**.  

### **Steps:**  
1. **User Input:**  
   - The user enters **age, gender, job, and mood** (Happy, Sad, or Angry).  

2. **Encoding:**  
   - The inputs are converted into numeric values using pre-trained `LabelEncoder`s.  
   - If an invalid gender or job is entered, an error message is displayed.  

3. **Model Selection & Prediction:**  
   - Based on the mood, the corresponding **machine learning model** is loaded.  
   - The encoded inputs are fed into the model to predict an **activity**.  

4. **Decoding & Output:**  
   - The predicted numeric value is converted back into an **activity name**.  
   - The final result is displayed.  

In [None]:
import joblib
import numpy as np

user_age = int(input("Enter your age: \n"))
user_gender = input("Enter your gender: \n")
user_job = input("Enter your job: \n")
user_mood = input("Enter your mood: \n")
print("\n--------------- INPUT ---------------")
print(f"User age: {user_age}\nUser gender: {user_gender}\nUser job: {user_job}\nUser Mood: {user_mood}\n")

le_gender = joblib.load("models/propsModels/le_gender.pkl")
le_job = joblib.load("models/propsModels/le_job.pkl")

match(user_mood):
    case "Mutlu": 
        le_mood = joblib.load("models/propsModels/le_happy.pkl") 
        model = joblib.load("models/rfHappyModel.pkl")
    case "Üzgün": 
        le_mood = joblib.load("models/propsModels/le_sad.pkl")
        model = joblib.load("models/rfSadModel.pkl")
    case "Öfkeli": 
        le_mood = joblib.load("models/propsModels/le_angry.pkl")
        model = joblib.load("models/rfAngryModel.pkl")
    case _: print("Not Valid Mood!")

encoded_gender = le_gender.transform([user_gender])[0] if user_gender in le_gender.classes_ else None
encoded_job = le_job.transform([user_job])[0] if user_job in le_job.classes_ else None

if encoded_gender is None or encoded_job is None:
    print("Geçersiz cinsiyet veya meslek girdiniz. Lütfen tekrar deneyin.")
    exit()
    
input_features = np.array([[user_age, encoded_gender, encoded_job]])
predicted_activity_encoded = model.predict(input_features)[0]
decoded_mood = le_mood.inverse_transform([predicted_activity_encoded])[0] if predicted_activity_encoded in range(len(le_mood.classes_)) else None
print("\n--------------- OUTPUT ---------------")
print(f"User age: {user_age}\nUser gender: {encoded_gender}\nUser job: {encoded_job}\nUser mood: {user_mood}\nUser Encoded Activity: {predicted_activity_encoded}\nUser predicted activity: {decoded_mood}\n")


--------------- INPUT ---------------
User age: 23
User gender: Kadın
User job: Öğrenci
User Mood: Mutlu


--------------- OUTPUT ---------------
User age: 23
User gender: 1
User job: 24
User mood: Mutlu
User Encoded Activity: 7
User predicted activity: Spor Yapmak (Bisiklet - Koşu - Yüzme)


