Title: Classification Model Performance Metrics

Accuracy, Precision, Recall, F1-Score:

Task 1: Evaluate a binary classifier for spam detection using accuracy, precision, recall and F1-score.

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Example dataset: Replace with your dataset
# Assuming 'text' is the column containing the email content and 'label' contains 'spam' or 'ham'
data = {
    'text': ['Free money now!', 'Important: Please read this email', 'Congrats, you won!', 'Your account has been compromised'],
    'label': ['spam', 'ham', 'spam', 'ham']
}

# Convert the data into a pandas DataFrame
df = pd.DataFrame(data)

# Convert labels to binary (spam = 1, ham = 0)
df['label'] = df['label'].map({'spam': 1, 'ham': 0})

# Text Vectorization: Convert text data to feature vectors using CountVectorizer
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['text'])

# Split the dataset into training and testing sets, ensuring both classes are represented
X_train, X_test, y_train, y_test = train_test_split(X, df['label'], test_size=0.3, random_state=42, stratify=df['label'])

# Train a binary classifier (Logistic Regression)
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model using accuracy, precision, recall, and F1-score
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Display the results
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")



Accuracy: 0.5000
Precision: 0.5000
Recall: 1.0000
F1-score: 0.6667



Task 2: Compare performance of a multi-class classifier on recognizing animals.

In [3]:

# Write your code here
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Example dataset: Replace with your actual animal dataset
# Assuming 'features' is a list of numerical features of the animals and 'class' contains the animal types.
data = {
    'feature1': [1.2, 2.3, 3.1, 1.5, 3.5, 4.1, 2.9, 3.3],
    'feature2': [2.3, 3.4, 4.1, 1.7, 3.9, 4.5, 2.1, 3.6],
    'feature3': [3.4, 1.2, 2.8, 2.1, 3.3, 4.2, 2.6, 3.0],
    'class': ['cat', 'dog', 'dog', 'cat', 'rabbit', 'rabbit', 'dog', 'cat']
}

# Convert to a pandas DataFrame
df = pd.DataFrame(data)

# Encode the target labels (animal classes) into numerical values
label_encoder = LabelEncoder()
df['class'] = label_encoder.fit_transform(df['class'])

# Split the data into features (X) and target (y)
X = df.drop(columns='class')
y = df['class']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train a multi-class classifier (Random Forest)
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=label_encoder.classes_)

# Display the results
print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(report)



Accuracy: 1.0000
Classification Report:
              precision    recall  f1-score   support

         cat       1.00      1.00      1.00         1
         dog       1.00      1.00      1.00         1
      rabbit       1.00      1.00      1.00         1

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3



Task 3: Analyze classifier performance for predicting disease outbreaks.

In [4]:
# Write your code here
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Example dataset: Replace with actual disease outbreak data
data = {
    'temperature': [30.1, 35.2, 22.3, 29.8, 31.5, 40.0, 25.3, 33.7],
    'humidity': [70, 60, 80, 65, 50, 55, 90, 60],
    'population_density': [150, 200, 100, 130, 170, 180, 220, 140],
    'previous_outbreak': [1, 0, 1, 1, 0, 0, 1, 1],
    'outbreak': [1, 0, 1, 1, 0, 0, 1, 1]  # 1 for outbreak, 0 for no outbreak
}

# Convert to pandas DataFrame
df = pd.DataFrame(data)

# Split the data into features (X) and target (y)
X = df.drop(columns='outbreak')
y = df['outbreak']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train a Random Forest Classifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

# Display results
print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(report)


Accuracy: 0.6667
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.50      0.67         2
           1       0.50      1.00      0.67         1

    accuracy                           0.67         3
   macro avg       0.75      0.75      0.67         3
weighted avg       0.83      0.67      0.67         3

