In [1]:
!pip install mlflow




In [2]:
import pandas as pd
import mlflow
import mlflow.sklearn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
import pickle
import time

In [3]:
data = pd.read_csv('/content/drive/MyDrive/datasets/data.csv')



# Features and target
X = data.drop('liked', axis=1)
y = data['liked']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model parameters
params = {
    'n_estimators': 100,
    'max_depth': 5,
    'random_state': 42
}

# Start MLflow run
with mlflow.start_run():
    # Log parameters
    for param_name, param_value in params.items():
        mlflow.log_param(param_name, param_value)

    # Train the model and measure training time
    start_time = time.time()
    model = RandomForestClassifier(**params)
    model.fit(X_train, y_train)
    training_time = time.time() - start_time

    # Make predictions
    y_pred = model.predict(X_test)

    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)

    # Log metrics
    mlflow.log_metric('accuracy', accuracy)
    mlflow.log_metric('precision', precision)
    mlflow.log_metric('recall', recall)
    mlflow.log_metric('training_time', training_time)

    # Save the model
    with open('model.pkl', 'wb') as f:
        pickle.dump(model, f)

    # Log the model as an artifact
    mlflow.sklearn.log_model(model, 'model')

    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'Training Time: {training_time:.4f} seconds')
    print('Model saved as model.pkl')



Accuracy: 0.9231
Precision: 0.9241
Recall: 0.9231
Training Time: 0.5451 seconds
Model saved as model.pkl
