## Prepare data

In [40]:
import pandas as pd

data_train = pd.read_csv("data_train.csv")
data_test = pd.read_csv("data_test.csv")

X_train, y_train = data_train['text'], data_train['label']
X_test, y_test = data_test['text'], data_test['label']

## Set up env variable

In [41]:
import os
from getpass import getpass
os.environ["NEPTUNE_API_TOKEN"] = getpass("Enter your API key here: ")

## Prepare a loop for creating different models and logging results to Neptune

In [42]:
import neptune
import neptune.integrations.sklearn as npt_utils
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer

# Train multiple models in separate runs to compare them in the app
n_estimators= [50, 100, 200]
min_samples_split = [2, 5]
for estimators in n_estimators:
    for min_split in min_samples_split:
    
        run = neptune.init_run(
        name=f"RandomForest with {estimators} trees and {min_split} min samples split",
        tags=["RandomForest", "classification"],
        project="dagm.solska/ag-news-classification",
        )
        
        text_clf = Pipeline([
        ('vect', CountVectorizer()),
        ('tfidf', TfidfTransformer()),
        ('clf', RandomForestClassifier(n_estimators=estimators, min_samples_split=min_split)),
            ],
        )
        text_clf.fit(X_train, y_train)
        
        run["cls_summary"] = npt_utils.create_classifier_summary(
        text_clf, X_train, X_test, y_train, y_test
        )
        run.stop()

https://app.neptune.ai/dagm.solska/ag-news-classification/e/AG-2
Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 70 operations to synchronize with Neptune. Do not kill this process.
Still waiting for the remaining 70 operations (0.00% done). Please wait.
Still waiting for the remaining 70 operations (0.00% done). Please wait.
All 70 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/dagm.solska/ag-news-classification/e/AG-2/metadata
https://app.neptune.ai/dagm.solska/ag-news-classification/e/AG-3
Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 70 operations to synchronize with Neptune. Do not kill this process.
Still waiting for the remaining 70 operations (0.00% done). Please wait.
Still waiting for the remaining 70 operations (0.00% done). Please wait.
All 70 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptun

Exception in thread NeptuneWebhooks:
Traceback (most recent call last):
  File "/Users/dagmarasolska/PycharmProjects/neptunetask/venv/lib/python3.9/site-packages/websocket/_socket.py", line 176, in send
    return _send()
  File "/Users/dagmarasolska/PycharmProjects/neptunetask/venv/lib/python3.9/site-packages/websocket/_socket.py", line 153, in _send
    return sock.send(data)
  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/ssl.py", line 1173, in send
    return self._sslobj.write(data)
socket.timeout: The write operation timed out

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/threading.py", line 973, in _bootstrap_inner
    self.run()
  File "/Users/dagmarasolska/PycharmProjects/neptunetask/venv/lib/python3.9/site-packages/neptune/internal/threading/daemon.py",

Done!
Waiting for the remaining 70 operations to synchronize with Neptune. Do not kill this process.
Still waiting for the remaining 70 operations (0.00% done). Please wait.
Still waiting for the remaining 70 operations (0.00% done). Please wait.
Still waiting for the remaining 70 operations (0.00% done). Please wait.
Still waiting for the remaining 70 operations (0.00% done). Please wait.
Still waiting for the remaining 70 operations (0.00% done). Please wait.
Still waiting for the remaining 70 operations (0.00% done). Please wait.
Still waiting for the remaining 70 operations (0.00% done). Please wait.
Still waiting for the remaining 70 operations (0.00% done). Please wait.
Still waiting for the remaining 70 operations (0.00% done). Please wait.
Still waiting for the remaining 70 operations (0.00% done). Please wait.
All 70 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/dagm.solska/ag-news-classification/e/AG-6/metadata
https://

## Log notebook to the app

In [43]:
project = neptune.init_project(project="dagm.solska/ag-news-classification")
project["initial_experiment"].upload("ag_news_classification_rf.ipynb")
project.stop()

https://app.neptune.ai/dagm.solska/ag-news-classification/
Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 1 operations to synchronize with Neptune. Do not kill this process.
All 1 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/dagm.solska/ag-news-classification/metadata
