In [31]:
import json
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from datetime import datetime
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report

In [32]:
# Load yak data from the JSON file
file_path = "D:\Datasets\Yak_Webshop_1\json_files\sample_diverse_data_recommendation.json"
with open(file_path, "r") as json_file:
    yak_data = json.load(json_file)
print("File uploaded successfully")
df = pd.DataFrame(yak_data)
df.head(3)

File uploaded successfully


Unnamed: 0,name,age,health,behavior,weight_kg,last_checkup_date
0,Betty-1,100.0,special_health,special_behavior,96.19,2023-10-19
1,Betty-2,5.9,excellent,positive,156.81,2023-10-25
2,Betty-3,18.2,fair,stubborn,53.22,2023-06-16


In [13]:
# Convert 'last_checkup_date' to datetime and calculate days since last checkup
current_date = datetime.strptime("2023-11-25", "%Y-%m-%d")  # Assuming current date for reference
df['last_checkup_date'] = pd.to_datetime(df['last_checkup_date'])
df['days_since_checkup'] = (current_date - df['last_checkup_date']).dt.days

df.head(3)

Unnamed: 0,name,age,health,behavior,weight_kg,last_checkup_date,days_since_checkup
0,Betty-1,100.0,special_health,special_behavior,96.19,2023-10-19,37
1,Betty-2,5.9,excellent,positive,156.81,2023-10-25,31
2,Betty-3,18.2,fair,stubborn,53.22,2023-06-16,162


In [14]:
# Convert 'behavior' column into one-hot encoded columns
df = pd.get_dummies(df, columns=['behavior'], prefix='behavior')
df.head(3)

Unnamed: 0,name,age,health,weight_kg,last_checkup_date,days_since_checkup,behavior_calm,behavior_negative,behavior_playful,behavior_positive,behavior_special_behavior,behavior_stubborn
0,Betty-1,100.0,special_health,96.19,2023-10-19,37,False,False,False,False,True,False
1,Betty-2,5.9,excellent,156.81,2023-10-25,31,False,False,False,True,False,False
2,Betty-3,18.2,fair,53.22,2023-06-16,162,False,False,False,False,False,True


In [15]:
df.columns

Index(['name', 'age', 'health', 'weight_kg', 'last_checkup_date',
       'days_since_checkup', 'behavior_calm', 'behavior_negative',
       'behavior_playful', 'behavior_positive', 'behavior_special_behavior',
       'behavior_stubborn'],
      dtype='object')

In [16]:
# Select features and target variable
X = df[['age',
         'weight_kg', 
         'days_since_checkup', 
         'behavior_calm', 
         'behavior_playful', 
         'behavior_positive',
         'behavior_negative', 
         'behavior_special_behavior', 
         'behavior_stubborn']]
y = df['health']

In [17]:
# Check distribution of target variable
print("Distribution of target variable:")
print('\n',y.value_counts())

Distribution of target variable:

 health
fair              36
excellent         31
good              13
special_health    10
poor              10
Name: count, dtype: int64


In [21]:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = DecisionTreeClassifier(max_depth=None, 
                               min_samples_split=2, 
                               min_samples_leaf=1, 
                               random_state=42)

model.fit(X_train, y_train)

In [22]:
# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate accuracy and other evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

print(f"Model Accuracy: {accuracy:.2f}")
print("\nClassification Report:")
print(classification_rep)

Model Accuracy: 1.00

Classification Report:
                precision    recall  f1-score   support

     excellent       1.00      1.00      1.00         3
          fair       1.00      1.00      1.00         7
          good       1.00      1.00      1.00         1
          poor       1.00      1.00      1.00         3
special_health       1.00      1.00      1.00         6

      accuracy                           1.00        20
     macro avg       1.00      1.00      1.00        20
  weighted avg       1.00      1.00      1.00        20



Making predictions

In [33]:
import pandas as pd
import random
from datetime import datetime

# List of possible behavior values
behaviors = ['calm', 'playful', 'positive', 'negative', 'special_behavior', 'stubborn']

# Create a DataFrame with rows for each behavior
test_data = []
for behavior in behaviors:
    new_yak = {
        'age': random.randint(1, 30),
        'weight_kg': round(random.uniform(50, 200), 2),
        'last_checkup_date': datetime.strptime(f"2023-{random.randint(1, 12)}-{random.randint(1, 28)}", "%Y-%m-%d"),
        'behavior': behavior
    }
    test_data.append(new_yak)

# Create DataFrame for test data
test_df = pd.DataFrame(test_data)
# Modify age directly in test_df where behavior is "special_behavior"
test_df.loc[test_df['behavior'] == 'special_behavior', 'age'] = 100

test_df


Unnamed: 0,age,weight_kg,last_checkup_date,behavior
0,10,60.46,2023-06-10,calm
1,8,68.25,2023-09-24,playful
2,29,151.62,2023-05-14,positive
3,23,127.47,2023-10-23,negative
4,100,96.93,2023-04-16,special_behavior
5,24,179.33,2023-07-03,stubborn


In [34]:
# Calculate days since last checkup
current_date = datetime.strptime("2023-11-29", "%Y-%m-%d")  # Assuming current date for reference
test_df['last_checkup_date'] = pd.to_datetime(test_df['last_checkup_date'])
test_df['days_since_checkup'] = (current_date - test_df['last_checkup_date']).dt.days

In [35]:
test_df

Unnamed: 0,age,weight_kg,last_checkup_date,behavior,days_since_checkup
0,10,60.46,2023-06-10,calm,172
1,8,68.25,2023-09-24,playful,66
2,29,151.62,2023-05-14,positive,199
3,23,127.47,2023-10-23,negative,37
4,100,96.93,2023-04-16,special_behavior,227
5,24,179.33,2023-07-03,stubborn,149


In [36]:
test_df = pd.get_dummies(test_df, columns=['behavior'], prefix='behavior')
test_df

Unnamed: 0,age,weight_kg,last_checkup_date,days_since_checkup,behavior_calm,behavior_negative,behavior_playful,behavior_positive,behavior_special_behavior,behavior_stubborn
0,10,60.46,2023-06-10,172,True,False,False,False,False,False
1,8,68.25,2023-09-24,66,False,False,True,False,False,False
2,29,151.62,2023-05-14,199,False,False,False,True,False,False
3,23,127.47,2023-10-23,37,False,True,False,False,False,False
4,100,96.93,2023-04-16,227,False,False,False,False,True,False
5,24,179.33,2023-07-03,149,False,False,False,False,False,True


In [37]:
test_df['prediction'] = model.predict(test_df[['age', 
                                               'weight_kg', 
                                               'days_since_checkup',
                                               'behavior_calm',
                                               'behavior_playful',
                                               'behavior_positive',
                                               'behavior_negative',
                                               'behavior_special_behavior',
                                               'behavior_stubborn']])

In [38]:
test_df['predicted_health'] = test_df['prediction']
test_df

Unnamed: 0,age,weight_kg,last_checkup_date,days_since_checkup,behavior_calm,behavior_negative,behavior_playful,behavior_positive,behavior_special_behavior,behavior_stubborn,prediction,predicted_health
0,10,60.46,2023-06-10,172,True,False,False,False,False,False,good,good
1,8,68.25,2023-09-24,66,False,False,True,False,False,False,excellent,excellent
2,29,151.62,2023-05-14,199,False,False,False,True,False,False,poor,poor
3,23,127.47,2023-10-23,37,False,True,False,False,False,False,poor,poor
4,100,96.93,2023-04-16,227,False,False,False,False,True,False,special_health,special_health
5,24,179.33,2023-07-03,149,False,False,False,False,False,True,fair,fair


In [39]:
# List of predicted health statuses
predicted_health = ['fair', 'excellent', 'good', 'special_health', 'poor']

# Generating recommendations based on predicted health status
for health_status in predicted_health:
    print(f"Recommendations for predicted health status: {health_status}")
    for index, row in test_df[test_df['predicted_health'] == health_status].iterrows():
        age = row['age']
        predicted_health = row['predicted_health']
        
        recommendation = f"For a yak of age {age} years, the predicted health status is: {predicted_health}"
        
        # Dietary and health recommendations based on health status
        if predicted_health == 'good':
            recommendation += ".\n- Maintain its regular feeding schedule."
        elif predicted_health == 'excellent':
            recommendation += ".\n- Continue its balanced diet."
        elif predicted_health == 'fair':
            recommendation += ".\n- Consider adjusting its diet and providing more nutrients.\n- Monitor its health closely."
        elif predicted_health == 'poor':
            recommendation += ".\n- It requires immediate attention and care.\n- Consult a veterinarian and adjust its diet for recovery."
        elif predicted_health == 'special_health':
            recommendation += ".\n- Consider a specialized diet.\n- Consult a veterinarian for tailored care."

        # Check each behavior and suggest based on the presence of behaviors
        behavior_suggestions = []
        behavior_map = {
            'calm': "Provide a calm and stress-free environment.",
            'playful': "Encourage playful activities for mental stimulation.",
            'positive': "Encourage positive interactions with other yaks.",
            'negative': "Identify stressors causing negative behavior and mitigate them.",
            'special_behavior': "Provide special attention as per specific behavioral needs.",
            'stubborn': "Handle stubborn behavior with patience and positive reinforcement."
        }

        for behavior in ['calm', 'playful', 'positive', 'negative', 'special_behavior', 'stubborn']:
            if row[f'behavior_{behavior}'] == 1:
                behavior_suggestions.append(f"{behavior.replace('_', ' ')}: {behavior_map[behavior]}")
        
        if behavior_suggestions:
            recommendation += "\nFor observed behaviors:"
            for suggestion in behavior_suggestions:
                recommendation += f"\n- {suggestion}"
        
        print(recommendation)
    print("\n")

Recommendations for predicted health status: fair
For a yak of age 24 years, the predicted health status is: fair.
- Consider adjusting its diet and providing more nutrients.
- Monitor its health closely.
For observed behaviors:
- stubborn: Handle stubborn behavior with patience and positive reinforcement.


Recommendations for predicted health status: excellent
For a yak of age 8 years, the predicted health status is: excellent.
- Continue its balanced diet.
For observed behaviors:
- playful: Encourage playful activities for mental stimulation.


Recommendations for predicted health status: good
For a yak of age 10 years, the predicted health status is: good.
- Maintain its regular feeding schedule.
For observed behaviors:
- calm: Provide a calm and stress-free environment.


Recommendations for predicted health status: special_health
For a yak of age 100 years, the predicted health status is: special_health.
- Consider a specialized diet.
- Consult a veterinarian for tailored care.
F

In [147]:
# List of predicted health statuses
predicted_health = ['fair', 'excellent', 'good', 'special_health', 'poor']

# Generating recommendations based on predicted health status
for health_status in predicted_health:
    print(f"Recommendations for predicted health status: {health_status}")
    for index, row in test_df[test_df['predicted_health'] == health_status].iterrows():
        age = row['age']
        predicted_health = row['predicted_health']
        
        recommendation = f"For a yak of age {age} years, the predicted health status is: {predicted_health}"
        
        if predicted_health == 'good':
            recommendation += ".\n- Maintain its regular feeding schedule."
        elif predicted_health == 'excellent':
            recommendation += ".\n- Continue its balanced diet."
        elif predicted_health == 'fair':
            recommendation += ".\n- Consider adjusting its diet and providing more nutrients.\n- Monitor its health closely."
        elif predicted_health == 'poor':
            recommendation += ".\n- It requires immediate attention and care.\n- Consult a veterinarian and adjust its diet for recovery."
        elif predicted_health == 'special_health':
            recommendation += ".\n- Consider a specialized diet.\n- Consult a veterinarian for tailored care."

        # Check each behavior and suggest based on the presence of behaviors
        behavior_suggestions = []
        behavior_map = {
            'calm': "Provide a calm and stress-free environment.",
            'playful': "Encourage playful activities for mental stimulation.",
            'positive': "Encourage positive interactions with other yaks.",
            'negative': "Identify stressors causing negative behavior and mitigate them.",
            'special_behavior': "Provide special attention as per specific behavioral needs.",
            'stubborn': "Handle stubborn behavior with patience and positive reinforcement."
        }
        for behavior in ['calm', 'playful', 'positive', 'negative', 'special_behavior', 'stubborn']:
            if row[f'behavior_{behavior}'] == 1:
                behavior_suggestions.append(f"{behavior.replace('_', ' ')}: {behavior_map[behavior]}")
        
        if behavior_suggestions:
            recommendation += "\nFor observed behaviors:" + '\n- '.join(behavior_suggestions)
        
        print(recommendation)
    print("\n")

Recommendations for predicted health status: fair
For a yak of age 8 years, the predicted health status is: fair.
- Consider adjusting its diet and providing more nutrients.
- Monitor its health closely.
For observed behaviors:stubborn: Handle stubborn behavior with patience and positive reinforcement.


Recommendations for predicted health status: excellent
For a yak of age 9 years, the predicted health status is: excellent.
- Continue its balanced diet.
For observed behaviors:positive: Encourage positive interactions with other yaks.


Recommendations for predicted health status: good
For a yak of age 29 years, the predicted health status is: good.
- Maintain its regular feeding schedule.
For observed behaviors:calm: Provide a calm and stress-free environment.


Recommendations for predicted health status: special_health
For a yak of age 12 years, the predicted health status is: special_health.
- Consider a specialized diet.
- Consult a veterinarian for tailored care.
For observed be