In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.preprocessing import StandardScaler

# Set visualization styles
sns.set(style="whitegrid")

In [None]:
# Load the dataset
file_path = 'EthioMart/data/my_data.csv'  # Adjust to your data path
data = pd.read_csv(file_path)

# Display the first few rows of the dataset
data.head()


In [None]:
# Split the data into features and target
X = data.drop(columns='target')  # Replace 'target' with your actual target column name
y = data['target']

# Optional: Scale features if necessary
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [None]:
# Initialize models
models = {
    'Logistic Regression': LogisticRegression(),
    'Random Forest': RandomForestClassifier(),
    'Support Vector Classifier': SVC()
}

# Store results
results = {}

# Train and evaluate each model
for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    
    # Calculate metrics
    accuracy = accuracy_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred, average='weighted')
    recall = recall_score(y_val, y_pred, average='weighted')
    f1 = f1_score(y_val, y_pred, average='weighted')
    
    # Store results
    results[model_name] = {
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1
    }


In [None]:
# Convert results to DataFrame for better visualization
results_df = pd.DataFrame(results).T
results_df = results_df.sort_values(by='F1 Score', ascending=False)

# Display the results
print(results_df)

# Visualize the results
plt.figure(figsize=(10, 6))
sns.barplot(x=results_df.index, y='F1 Score', data=results_df, palette='viridis')
plt.title('Model Comparison: F1 Score')
plt.xlabel('Models')
plt.ylabel('F1 Score')
plt.xticks(rotation=45)
plt.show()


In [None]:

### Explanation of Sections

1. **Import Libraries**: 
   - Import necessary libraries for data manipulation, visualization, and machine learning.

2. **Load Data**: 
   - Load your dataset and display the first few rows.

3. **Preprocess Data**: 
   - Prepare the data by splitting it into features and target variables. Optionally, scale the features and split the data into training and validation sets.

4. **Train Models**: 
   - Initialize various machine learning models, fit them to the training data, and calculate evaluation metrics (accuracy, precision, recall, F1 score).

5. **Compare Model Performance**: 
   - Store the results in a DataFrame, display the results, and visualize the F1 score for each model.

6. **Conclusion**: 
   - Summarize the findings from the model comparison.

### Usage

- Save this content as `model_comparison.ipynb`.
- Run the notebook using Jupyter Notebook or Jupyter Lab, ensuring the paths and column names align with your dataset.

Feel free to add more models or evaluation metrics based on your specific requirements! If you need any further modifications or enhancements, just let me know!
