# Importing libraries + loading data

In [None]:
import mlflow
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score
from sklearn.model_selection import StratifiedKFold, cross_val_score, train_test_split
from sklearn.pipeline import Pipeline

In [2]:
import warnings

warnings.filterwarnings("ignore")

In [3]:
print("Loading processed train data...")
df = pd.read_csv("../data/processed/train_processed.csv")
print("Train shape:", df.shape)

df_test_raw = pd.read_csv("../data/raw/test.csv")
print("Test raw shape:", df_test_raw.shape)
print("Shape:", df.shape)
print("Columns:", df.columns.tolist())

Loading processed train data...
Train shape: (7558, 235)
Test raw shape: (3263, 4)
Shape: (7558, 235)
Columns: ['id', 'target', 'clean_text', 'url_count', 'typo_count', 'hashtag_count', 'has_url', 'has_hashtag', 'has_typos', 'eda_char_count', 'eda_word_count', 'location_encoded', 'keyword_clean_accident', 'keyword_clean_aftershock', 'keyword_clean_airplane%20accident', 'keyword_clean_ambulance', 'keyword_clean_annihilated', 'keyword_clean_annihilation', 'keyword_clean_apocalypse', 'keyword_clean_armageddon', 'keyword_clean_army', 'keyword_clean_arson', 'keyword_clean_arsonist', 'keyword_clean_attack', 'keyword_clean_attacked', 'keyword_clean_avalanche', 'keyword_clean_battle', 'keyword_clean_bioterror', 'keyword_clean_bioterrorism', 'keyword_clean_blaze', 'keyword_clean_blazing', 'keyword_clean_bleeding', 'keyword_clean_blew%20up', 'keyword_clean_blight', 'keyword_clean_blizzard', 'keyword_clean_blood', 'keyword_clean_bloody', 'keyword_clean_blown%20up', 'keyword_clean_body%20bag', 'ke

In [4]:
X = df.drop(columns=["id", "target", "clean_text"])
text_col = df["clean_text"]
y = df["target"]

print("\nTarget distribution:")
print(y.value_counts(normalize=True))


Target distribution:
target
0    0.570654
1    0.429346
Name: proportion, dtype: float64


In [5]:
X_train, X_val, text_train, text_val, y_train, y_val = train_test_split(
    X, text_col, y, test_size=0.2, stratify=y, random_state=42
)

print("\nTrain size:", len(X_train), f"({len(X_train) / len(df):.1%})")
print("Val size:  ", len(X_val), f"({len(X_val) / len(df):.1%})")


Train size: 6046 (80.0%)
Val size:   1512 (20.0%)


In [6]:
text_features = ["clean_text"]
num_features = [col for col in X_train.columns if col not in text_features]

preprocessor = ColumnTransformer(
    transformers=[
        (
            "tfidf",
            TfidfVectorizer(
                max_features=8000, ngram_range=(1, 2), min_df=2, max_df=0.95, stop_words="english"
            ),
            "clean_text",
        ),
        ("passthrough", "passthrough", num_features),
    ],
    remainder="drop",
)

pipeline = Pipeline(
    [
        ("preprocessor", preprocessor),
        (
            "classifier",
            LogisticRegression(
                max_iter=1000, class_weight="balanced", random_state=42, solver="lbfgs"
            ),
        ),
    ]
)

In [7]:
text_train = text_train.fillna("")
text_val = text_val.fillna("")

text_train = text_train.astype(str)
text_val = text_val.astype(str)

print("NaN in text_train after fix:", text_train.isna().sum())
print("Empty strings in text_train:", (text_train == "").sum())

NaN in text_train after fix: 0
Empty strings in text_train: 2


In [8]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(
    pipeline, X_train.assign(clean_text=text_train), y_train, cv=cv, scoring="f1"
)

print("\n5-fold CV F1 scores:", cv_scores)
print(f"Mean CV F1: {cv_scores.mean():.4f} ¬± {cv_scores.std():.4f}")


5-fold CV F1 scores: [0.79492188 0.78148148 0.80305927 0.79052133 0.77710843]
Mean CV F1: 0.7894 ¬± 0.0093


In [9]:
pipeline.fit(X_train.assign(clean_text=text_train), y_train)
y_pred_val = pipeline.predict(X_val.assign(clean_text=text_val))

print("\nValidation performance:")
print(classification_report(y_val, y_pred_val))
print("Accuracy:", accuracy_score(y_val, y_pred_val))
print("F1-score:", f1_score(y_val, y_pred_val))

print("\nConfusion Matrix:")
print(confusion_matrix(y_val, y_pred_val))


Validation performance:
              precision    recall  f1-score   support

           0       0.85      0.83      0.84       863
           1       0.78      0.81      0.79       649

    accuracy                           0.82      1512
   macro avg       0.82      0.82      0.82      1512
weighted avg       0.82      0.82      0.82      1512

Accuracy: 0.8201058201058201
F1-score: 0.7936267071320182

Confusion Matrix:
[[717 146]
 [126 523]]


In [10]:
tfidf_step = pipeline.named_steps["preprocessor"].named_transformers_["tfidf"]
feature_names = [
    f"tfidf_{i}" for i in range(tfidf_step.get_feature_names_out().shape[0])
] + num_features

coefs = pd.Series(pipeline.named_steps["classifier"].coef_[0], index=feature_names).sort_values(
    ascending=False
)

print("\nTop 20 most important features (by LogReg coef):")
print(coefs.head(20))

print("\nTop 20 least important (negative coef):")
print(coefs.tail(20))


Top 20 most important features (by LogReg coef):
location_encoded                    11.773482
tfidf_3342                           2.394596
tfidf_3854                           1.971874
tfidf_4753                           1.856466
keyword_clean_derailment             1.746473
keyword_clean_oil%20spill            1.736326
tfidf_7159                           1.727716
keyword_clean_nuclear%20disaster     1.724155
tfidf_5857                           1.708804
keyword_clean_wild%20fires           1.699712
keyword_clean_typhoon                1.691444
keyword_clean_debris                 1.668591
keyword_clean_suicide%20bombing      1.667518
tfidf_6690                           1.663368
keyword_clean_outbreak               1.653393
keyword_clean_mass%20murder          1.584129
tfidf_1208                           1.559466
tfidf_1756                           1.551488
keyword_clean_forest%20fires         1.531295
keyword_clean_wreckage               1.494667
dtype: float64

Top 20 least i

In [12]:
mlflow.set_tracking_uri("http://localhost:5000")
# mlflow.set_experiment("disaster_tweets_baseline")

with mlflow.start_run(run_name="tfidf_logreg_pipeline"):
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    cv_scores = cross_val_score(
        pipeline, X_train.assign(clean_text=text_train), y_train, cv=cv, scoring="f1"
    )

    mlflow.log_metric("cv_f1_mean", cv_scores.mean())
    mlflow.log_metric("cv_f1_std", cv_scores.std())
    mlflow.log_param("max_features", 8000)
    mlflow.log_param("ngram_range", "(1,2)")
    mlflow.log_param("class_weight", "balanced")
    mlflow.log_param("solver", "lbfgs")

    pipeline.fit(X_train.assign(clean_text=text_train), y_train)
    y_pred_val = pipeline.predict(X_val.assign(clean_text=text_val))

    f1 = f1_score(y_val, y_pred_val)
    acc = accuracy_score(y_val, y_pred_val)

    mlflow.log_metric("val_f1", f1)
    mlflow.log_metric("val_accuracy", acc)

    mlflow.sklearn.log_model(pipeline, "model")

    print("\nValidation performance:")
    print(classification_report(y_val, y_pred_val))
    print("Accuracy:", acc)
    print("F1-score:", f1)

    print("\nConfusion Matrix:")
    print(confusion_matrix(y_val, y_pred_val))




Validation performance:
              precision    recall  f1-score   support

           0       0.85      0.83      0.84       863
           1       0.78      0.81      0.79       649

    accuracy                           0.82      1512
   macro avg       0.82      0.82      0.82      1512
weighted avg       0.82      0.82      0.82      1512

Accuracy: 0.8201058201058201
F1-score: 0.7936267071320182

Confusion Matrix:
[[717 146]
 [126 523]]
üèÉ View run tfidf_logreg_pipeline at: http://localhost:5000/#/experiments/0/runs/f5276517042640ba801a498b6e1210af
üß™ View experiment at: http://localhost:5000/#/experiments/0


In [13]:
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("disaster_tweets_baseline")

param_combinations = [
    {"max_features": 8000, "ngram_range": (1, 2), "C": 1.0, "solver": "lbfgs", "penalty": "l2"},
    {"max_features": 10000, "ngram_range": (1, 2), "C": 0.5, "solver": "lbfgs", "penalty": "l2"},
    {
        "max_features": 12000,
        "ngram_range": (1, 3),
        "C": 2.0,
        "solver": "liblinear",
        "penalty": "l1",
    },
    {
        "max_features": 15000,
        "ngram_range": (1, 2),
        "C": 0.1,
        "solver": "liblinear",
        "penalty": "l1",
    },
    {"max_features": 8000, "ngram_range": (1, 1), "C": 5.0, "solver": "lbfgs", "penalty": "l2"},
]

for params in param_combinations:
    with mlflow.start_run(run_name=f"tfidf_logreg_{params['max_features']}_{params['C']}"):
        tfidf = TfidfVectorizer(
            max_features=params["max_features"],
            ngram_range=params["ngram_range"],
            min_df=2,
            max_df=0.95,
            stop_words="english",
        )

        preprocessor = ColumnTransformer(
            transformers=[
                ("tfidf", tfidf, "clean_text"),
                ("passthrough", "passthrough", num_features),
            ],
            remainder="drop",
        )

        pipeline = Pipeline(
            [
                ("preprocessor", preprocessor),
                (
                    "classifier",
                    LogisticRegression(
                        max_iter=2000,
                        C=params["C"],
                        solver=params["solver"],
                        penalty=params["penalty"],
                        class_weight="balanced",
                        random_state=42,
                    ),
                ),
            ]
        )

        cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
        cv_scores = cross_val_score(
            pipeline, X_train.assign(clean_text=text_train), y_train, cv=cv, scoring="f1"
        )

        mlflow.log_param("max_features", params["max_features"])
        mlflow.log_param("ngram_range", str(params["ngram_range"]))
        mlflow.log_param("C", params["C"])
        mlflow.log_param("solver", params["solver"])
        mlflow.log_param("penalty", params["penalty"])

        mlflow.log_metric("cv_f1_mean", cv_scores.mean())
        mlflow.log_metric("cv_f1_std", cv_scores.std())

        pipeline.fit(X_train.assign(clean_text=text_train), y_train)
        y_pred_val = pipeline.predict(X_val.assign(clean_text=text_val))

        f1 = f1_score(y_val, y_pred_val)
        acc = accuracy_score(y_val, y_pred_val)

        mlflow.log_metric("val_f1", f1)
        mlflow.log_metric("val_accuracy", acc)

        mlflow.sklearn.log_model(pipeline, "model")

        print(f"\nParams: {params}")
        print("CV F1 mean:", cv_scores.mean())
        print("Val F1:", f1)
        print("Val Accuracy:", acc)

2026/02/19 23:21:02 INFO mlflow.tracking.fluent: Experiment with name 'disaster_tweets_baseline' does not exist. Creating a new experiment.



Params: {'max_features': 8000, 'ngram_range': (1, 2), 'C': 1.0, 'solver': 'lbfgs', 'penalty': 'l2'}
CV F1 mean: 0.7894184781306403
Val F1: 0.7936267071320182
Val Accuracy: 0.8201058201058201
üèÉ View run tfidf_logreg_8000_1.0 at: http://localhost:5000/#/experiments/1/runs/bf73552e9ac546ed86345e8ef1fd93ae
üß™ View experiment at: http://localhost:5000/#/experiments/1





Params: {'max_features': 10000, 'ngram_range': (1, 2), 'C': 0.5, 'solver': 'lbfgs', 'penalty': 'l2'}
CV F1 mean: 0.7853717940630622
Val F1: 0.787556904400607
Val Accuracy: 0.8148148148148148
üèÉ View run tfidf_logreg_10000_0.5 at: http://localhost:5000/#/experiments/1/runs/a9812e8f4a7c49228ccd3214189f43bd
üß™ View experiment at: http://localhost:5000/#/experiments/1





Params: {'max_features': 12000, 'ngram_range': (1, 3), 'C': 2.0, 'solver': 'liblinear', 'penalty': 'l1'}
CV F1 mean: 0.7782948118355534
Val F1: 0.7834586466165413
Val Accuracy: 0.8095238095238095
üèÉ View run tfidf_logreg_12000_2.0 at: http://localhost:5000/#/experiments/1/runs/3a44c88bc3a24c1ba781764b634086ae
üß™ View experiment at: http://localhost:5000/#/experiments/1





Params: {'max_features': 15000, 'ngram_range': (1, 2), 'C': 0.1, 'solver': 'liblinear', 'penalty': 'l1'}
CV F1 mean: 0.73095235097427
Val F1: 0.7279466271312083
Val Accuracy: 0.7572751322751323
üèÉ View run tfidf_logreg_15000_0.1 at: http://localhost:5000/#/experiments/1/runs/15827398ecc644e68025391ab39a2be4
üß™ View experiment at: http://localhost:5000/#/experiments/1





Params: {'max_features': 8000, 'ngram_range': (1, 1), 'C': 5.0, 'solver': 'lbfgs', 'penalty': 'l2'}
CV F1 mean: 0.787864154419019
Val F1: 0.7945205479452054
Val Accuracy: 0.8214285714285714
üèÉ View run tfidf_logreg_8000_5.0 at: http://localhost:5000/#/experiments/1/runs/552ebc78e2e54a34a52870bb371e8523
üß™ View experiment at: http://localhost:5000/#/experiments/1


In [None]:
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("disaster_tweets_baseline_v2")

param_combinations = [
    {
        "max_features": 10000,
        "ngram_range": (1, 2),
        "min_df": 2,
        "sublinear_tf": True,
        "norm": "l2",
        "C": 0.5,
        "solver": "lbfgs",
        "penalty": "l2",
    },
    {
        "max_features": 12000,
        "ngram_range": (1, 3),
        "min_df": 3,
        "sublinear_tf": True,
        "norm": "l2",
        "C": 1.0,
        "solver": "liblinear",
        "penalty": "l1",
    },
    {
        "max_features": 15000,
        "ngram_range": (1, 2),
        "min_df": 2,
        "sublinear_tf": False,
        "norm": "l1",
        "C": 2.0,
        "solver": "lbfgs",
        "penalty": "l2",
    },
    {
        "max_features": 8000,
        "ngram_range": (1, 2),
        "min_df": 5,
        "sublinear_tf": True,
        "norm": "l2",
        "C": 0.1,
        "solver": "lbfgs",
        "penalty": "l2",
    },
    {
        "max_features": 10000,
        "ngram_range": (1, 1),
        "min_df": 2,
        "sublinear_tf": True,
        "norm": "l2",
        "C": 0.01,
        "solver": "liblinear",
        "penalty": "l1",
    },
    {
        "max_features": 12000,
        "ngram_range": (1, 2),
        "min_df": 3,
        "sublinear_tf": True,
        "norm": "l2",
        "C": 1.0,
        "solver": "saga",
        "penalty": "elasticnet",
        "l1_ratio": 0.5,
    },
    {
        "max_features": 15000,
        "ngram_range": (1, 3),
        "min_df": 2,
        "sublinear_tf": True,
        "norm": "l2",
        "C": 2.0,
        "solver": "saga",
        "penalty": "elasticnet",
        "l1_ratio": 0.2,
    },
    {
        "max_features": 10000,
        "ngram_range": (1, 2),
        "min_df": 3,
        "sublinear_tf": False,
        "norm": "l1",
        "C": 0.5,
        "solver": "saga",
        "penalty": "elasticnet",
        "l1_ratio": 0.8,
    },
    {
        "max_features": 20000,
        "ngram_range": (1, 2),
        "min_df": 2,
        "sublinear_tf": True,
        "norm": "l2",
        "C": 5.0,
        "solver": "lbfgs",
        "penalty": "l2",
    },
    {
        "max_features": 5000,
        "ngram_range": (1, 2),
        "min_df": 5,
        "sublinear_tf": True,
        "norm": "l2",
        "C": 10.0,
        "solver": "liblinear",
        "penalty": "l1",
    },
]

for idx, params in enumerate(param_combinations, 1):
    run_name = (
        f"run_{idx:02d}_mf{params['max_features']}_C{params['C']}_ngram{params['ngram_range']}"
    )

    with mlflow.start_run(run_name=run_name):
        tfidf = TfidfVectorizer(
            max_features=params["max_features"],
            ngram_range=params["ngram_range"],
            min_df=params.get("min_df", 2),
            max_df=0.95,
            stop_words="english",
            sublinear_tf=params.get("sublinear_tf", False),
            norm=params.get("norm", "l2"),
        )

        preprocessor = ColumnTransformer(
            transformers=[
                ("tfidf", tfidf, "clean_text"),
                ("passthrough", "passthrough", num_features),
            ],
            remainder="drop",
        )

        classifier_params = {
            "max_iter": 2000,
            "C": params["C"],
            "class_weight": "balanced",
            "random_state": 42,
        }

        if "solver" in params:
            classifier_params["solver"] = params["solver"]
        if "penalty" in params:
            classifier_params["penalty"] = params["penalty"]
        if "l1_ratio" in params:
            classifier_params["l1_ratio"] = params["l1_ratio"]

        pipeline = Pipeline(
            [
                ("preprocessor", preprocessor),
                ("classifier", LogisticRegression(**classifier_params)),
            ]
        )

        cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
        cv_scores = cross_val_score(
            pipeline, X_train.assign(clean_text=text_train), y_train, cv=cv, scoring="f1", n_jobs=-1
        )

        mlflow.log_params(params)
        mlflow.log_metric("cv_f1_mean", cv_scores.mean())
        mlflow.log_metric("cv_f1_std", cv_scores.std())

        pipeline.fit(X_train.assign(clean_text=text_train), y_train)
        y_pred_val = pipeline.predict(X_val.assign(clean_text=text_val))

        f1 = f1_score(y_val, y_pred_val)
        acc = accuracy_score(y_val, y_pred_val)

        mlflow.log_metric("val_f1", f1)
        mlflow.log_metric("val_accuracy", acc)

        mlflow.sklearn.log_model(pipeline, "model")

        print(f"\n[{idx}/{len(param_combinations)}] Params: {params}")
        print(f"CV F1 mean: {cv_scores.mean():.4f} ¬± {cv_scores.std():.4f}")
        print(f"Val F1: {f1:.4f}")
        print(f"Val Accuracy: {acc:.4f}")

2026/02/19 23:29:24 INFO mlflow.tracking.fluent: Experiment with name 'disaster_tweets_baseline_v2' does not exist. Creating a new experiment.



[1/10] Params: {'max_features': 10000, 'ngram_range': (1, 2), 'min_df': 2, 'sublinear_tf': True, 'norm': 'l2', 'C': 0.5, 'solver': 'lbfgs', 'penalty': 'l2'}
CV F1 mean: 0.7860 ¬± 0.0102
Val F1: 0.7879
Val Accuracy: 0.8148
üèÉ View run run_01_mf10000_C0.5_ngram(1, 2) at: http://localhost:5000/#/experiments/2/runs/98022c5f62684e18aeaac4508e958664
üß™ View experiment at: http://localhost:5000/#/experiments/2





[2/10] Params: {'max_features': 12000, 'ngram_range': (1, 3), 'min_df': 3, 'sublinear_tf': True, 'norm': 'l2', 'C': 1.0, 'solver': 'liblinear', 'penalty': 'l1'}
CV F1 mean: 0.7828 ¬± 0.0100
Val F1: 0.7789
Val Accuracy: 0.8056
üèÉ View run run_02_mf12000_C1.0_ngram(1, 3) at: http://localhost:5000/#/experiments/2/runs/0753dd02add0418485ccbfdfdee0549f
üß™ View experiment at: http://localhost:5000/#/experiments/2





[3/10] Params: {'max_features': 15000, 'ngram_range': (1, 2), 'min_df': 2, 'sublinear_tf': False, 'norm': 'l1', 'C': 2.0, 'solver': 'lbfgs', 'penalty': 'l2'}
CV F1 mean: 0.7785 ¬± 0.0124
Val F1: 0.7754
Val Accuracy: 0.8016
üèÉ View run run_03_mf15000_C2.0_ngram(1, 2) at: http://localhost:5000/#/experiments/2/runs/431127c4b8bf429bb89e14f70cec6bcc
üß™ View experiment at: http://localhost:5000/#/experiments/2





[4/10] Params: {'max_features': 8000, 'ngram_range': (1, 2), 'min_df': 5, 'sublinear_tf': True, 'norm': 'l2', 'C': 0.1, 'solver': 'lbfgs', 'penalty': 'l2'}
CV F1 mean: 0.7420 ¬± 0.0171
Val F1: 0.7399
Val Accuracy: 0.7698
üèÉ View run run_04_mf8000_C0.1_ngram(1, 2) at: http://localhost:5000/#/experiments/2/runs/9135451f95074435ae80f90fb8e49ade
üß™ View experiment at: http://localhost:5000/#/experiments/2





[5/10] Params: {'max_features': 10000, 'ngram_range': (1, 1), 'min_df': 2, 'sublinear_tf': True, 'norm': 'l2', 'C': 0.01, 'solver': 'liblinear', 'penalty': 'l1'}
CV F1 mean: 0.6269 ¬± 0.0107
Val F1: 0.6169
Val Accuracy: 0.6230
üèÉ View run run_05_mf10000_C0.01_ngram(1, 1) at: http://localhost:5000/#/experiments/2/runs/6f511b016b404009b10f32c437cb7248
üß™ View experiment at: http://localhost:5000/#/experiments/2





[6/10] Params: {'max_features': 12000, 'ngram_range': (1, 2), 'min_df': 3, 'sublinear_tf': True, 'norm': 'l2', 'C': 1.0, 'solver': 'saga', 'penalty': 'elasticnet', 'l1_ratio': 0.5}
CV F1 mean: 0.7623 ¬± 0.0115
Val F1: 0.7625
Val Accuracy: 0.7890
üèÉ View run run_06_mf12000_C1.0_ngram(1, 2) at: http://localhost:5000/#/experiments/2/runs/3825bf6ffb5540a999186a0df7439662
üß™ View experiment at: http://localhost:5000/#/experiments/2





[7/10] Params: {'max_features': 15000, 'ngram_range': (1, 3), 'min_df': 2, 'sublinear_tf': True, 'norm': 'l2', 'C': 2.0, 'solver': 'saga', 'penalty': 'elasticnet', 'l1_ratio': 0.2}
CV F1 mean: 0.7656 ¬± 0.0135
Val F1: 0.7652
Val Accuracy: 0.7930
üèÉ View run run_07_mf15000_C2.0_ngram(1, 3) at: http://localhost:5000/#/experiments/2/runs/1f92fc43214447f18c0ed876ff6c950b
üß™ View experiment at: http://localhost:5000/#/experiments/2





[8/10] Params: {'max_features': 10000, 'ngram_range': (1, 2), 'min_df': 3, 'sublinear_tf': False, 'norm': 'l1', 'C': 0.5, 'solver': 'saga', 'penalty': 'elasticnet', 'l1_ratio': 0.8}
CV F1 mean: 0.7375 ¬± 0.0206
Val F1: 0.7382
Val Accuracy: 0.7646
üèÉ View run run_08_mf10000_C0.5_ngram(1, 2) at: http://localhost:5000/#/experiments/2/runs/ad4bf28d52b54557a40cf15910d6300a
üß™ View experiment at: http://localhost:5000/#/experiments/2





[9/10] Params: {'max_features': 20000, 'ngram_range': (1, 2), 'min_df': 2, 'sublinear_tf': True, 'norm': 'l2', 'C': 5.0, 'solver': 'lbfgs', 'penalty': 'l2'}
CV F1 mean: 0.7858 ¬± 0.0122
Val F1: 0.7920
Val Accuracy: 0.8208
üèÉ View run run_09_mf20000_C5.0_ngram(1, 2) at: http://localhost:5000/#/experiments/2/runs/fe508497f5e14611a8f5a5c096d398e9
üß™ View experiment at: http://localhost:5000/#/experiments/2





[10/10] Params: {'max_features': 5000, 'ngram_range': (1, 2), 'min_df': 5, 'sublinear_tf': True, 'norm': 'l2', 'C': 10.0, 'solver': 'liblinear', 'penalty': 'l1'}
CV F1 mean: 0.7542 ¬± 0.0139
Val F1: 0.7602
Val Accuracy: 0.7897
üèÉ View run run_10_mf5000_C10.0_ngram(1, 2) at: http://localhost:5000/#/experiments/2/runs/b60b0f8e5f884c73ace9292c21ad23d6
üß™ View experiment at: http://localhost:5000/#/experiments/2
