# Model Training

open mlflow with mlflow server --host 0.0.0.0 --backend-store-uri ./mlruns --artifacts-destination ./mlartifacts --dev
mlflow.set_tracking_uri("http://127.0.0.1:5000")

In [1]:
import numpy as np


X = np.load('data/features.npy')
y = np.load('data/labels.npy')



In [2]:
from sklearn.model_selection import train_test_split

RANDOM_SEED = 42
SUBSET_SIZE = 1000

X_subset = X[:SUBSET_SIZE]
y_subset = y[:SUBSET_SIZE]


X_train, X_test, y_train, y_test = train_test_split(X_subset, y_subset, test_size=0.2, random_state=RANDOM_SEED)

## Connect to MLFlow



In [10]:
import mlflow

EXPERIMENT_NAME = 'fake-news-detector'

experiment = mlflow.get_experiment_by_name(EXPERIMENT_NAME)


mlflow.set_tracking_uri("http://127.0.0.1:5000")


In [11]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score

run_name = "proof-1"


model = SVC(random_state=RANDOM_SEED)


with mlflow.start_run(
    experiment_id=experiment.experiment_id,
    run_name=run_name,
) as run:
    
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precission = precision_score(y_test, y_pred, average="weighted")
    recall = recall_score(y_test, y_pred, average="weighted")

    print(f"Accuracy: {accuracy}")
    
    # Log metrics
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("precision", precission)
    mlflow.log_metric("recall", recall)

    # Log the model parameters
    mlflow.log_param("random_seed", RANDOM_SEED)
    mlflow.log_param("model", "SVC")

    # log the model
    mlflow.sklearn.log_model(model, "model")



Accuracy: 0.84


2024/11/12 12:05:10 INFO mlflow.tracking._tracking_service.client: 🏃 View run proof-1 at: http://127.0.0.1:5000/#/experiments/548563287919072793/runs/bf1e9bff7eef433f8e33b12c4e3ed706.
2024/11/12 12:05:10 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/548563287919072793.


In [12]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

run_name = "proof-2"
RANDOM_STATE = 42
MAX_DEPTH = 3

model = DecisionTreeClassifier(random_state=RANDOM_STATE, max_depth=MAX_DEPTH)


with mlflow.start_run(
    experiment_id=experiment.experiment_id,
    run_name=run_name,
) as run:
    
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precission = precision_score(y_test, y_pred, average="weighted")
    recall = recall_score(y_test, y_pred, average="weighted")

    print(f"Accuracy: {accuracy}")
    
    # Log metrics
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("precision", precission)
    mlflow.log_metric("recall", recall)

    # Log the model parameters
    mlflow.log_param("random_state", RANDOM_STATE)
    mlflow.log_param("model", "Decision Tree Classifier")

    # log the model
    mlflow.sklearn.log_model(model, "model")



Accuracy: 0.77


2024/11/12 12:05:17 INFO mlflow.tracking._tracking_service.client: 🏃 View run proof-2 at: http://127.0.0.1:5000/#/experiments/548563287919072793/runs/881a56ef0f4c4e65b7b0ea4998fb02e0.
2024/11/12 12:05:17 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/548563287919072793.


In [13]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

run_name = "proof-3"
N_NEIGHBORS=14

model = KNeighborsClassifier(n_neighbors=N_NEIGHBORS)


with mlflow.start_run(
    experiment_id=experiment.experiment_id,
    run_name=run_name,
) as run:
    
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precission = precision_score(y_test, y_pred, average="weighted")
    recall = recall_score(y_test, y_pred, average="weighted")

    print(f"Accuracy: {accuracy}")
    
    # Log metrics
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("precision", precission)
    mlflow.log_metric("recall", recall)

    # Log the model parameters
    mlflow.log_param("N Neighbors", N_NEIGHBORS)
    mlflow.log_param("model", "KNeighborsClassifier")

    # log the model
    mlflow.sklearn.log_model(model, "model")



Accuracy: 0.835


2024/11/12 12:05:20 INFO mlflow.tracking._tracking_service.client: 🏃 View run proof-3 at: http://127.0.0.1:5000/#/experiments/548563287919072793/runs/2d81106074f44ea0b9cb2f20fa3f47db.
2024/11/12 12:05:20 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/548563287919072793.


In [14]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

run_name = "proof-4"

MAX_DEPTH = 15
N_ESTIMATORS = 30 
MAX_FEATURES = 1

model = RandomForestClassifier(max_depth=MAX_DEPTH, n_estimators=N_ESTIMATORS, max_features=MAX_FEATURES)


with mlflow.start_run(
    experiment_id=experiment.experiment_id,
    run_name=run_name,
) as run:
    
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precission = precision_score(y_test, y_pred, average="weighted")
    recall = recall_score(y_test, y_pred, average="weighted")

    print(f"Accuracy: {accuracy}")
    
    # Log metrics
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("precision", precission)
    mlflow.log_metric("recall", recall)

    # Log the model parameters
    mlflow.log_param("max_depth", MAX_DEPTH)
    mlflow.log_param("n_estimators", N_ESTIMATORS)
    mlflow.log_param("max_features", MAX_FEATURES)
    mlflow.log_param("model", "RandomForestClassifier")

    # log the model
    mlflow.sklearn.log_model(model, "model")



Accuracy: 0.84


2024/11/12 12:05:25 INFO mlflow.tracking._tracking_service.client: 🏃 View run proof-4 at: http://127.0.0.1:5000/#/experiments/548563287919072793/runs/3424718721814a3d8517f07d3627a035.
2024/11/12 12:05:25 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/548563287919072793.


In [15]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

run_name = "proof-5"

COEF = 0.05

model = SVC(kernel="linear", C=COEF)


with mlflow.start_run(
    experiment_id=experiment.experiment_id,
    run_name=run_name,
) as run:
    
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precission = precision_score(y_test, y_pred, average="weighted")
    recall = recall_score(y_test, y_pred, average="weighted")

    print(f"Accuracy: {accuracy}")
    
    # Log metrics
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("precision", precission)
    mlflow.log_metric("recall", recall)
    mlflow.log_param("model", "SVC Linear")

    # Log the model parameters
    mlflow.log_param("c", COEF)
    
    # log the model
    mlflow.sklearn.log_model(model, "model")



Accuracy: 0.87


2024/11/12 12:05:58 INFO mlflow.tracking._tracking_service.client: 🏃 View run proof-5 at: http://127.0.0.1:5000/#/experiments/548563287919072793/runs/cbb826aad85a4dbfb46d00bd1b3e71e2.
2024/11/12 12:05:58 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/548563287919072793.
