# NLP demo

In [40]:
import mlflow.sklearn
import os
import numpy as np
import pandas as pd

# Load the model
_id = "9181fa7456f5409485a1c31ff235c76a"
# Use the id to load the model with mlflow.sklearn.load_model
model = mlflow.sklearn.load_model(model_uri="runs:/" + _id + "/model")

In [41]:
texts = ["inbox file edit", "shop game"]
samples = np.array(texts)
# load the vectorizer artifact (vectorizer/model.pkl)
vectorizer = mlflow.sklearn.load_model(model_uri="runs:/" + _id + "/vectorizer")
# transform the text
X = vectorizer.transform(samples)

In [42]:
# Make a prediction
y_pred = model.predict(X)

In [43]:
y_pred

array([0, 1], dtype=int64)

## Try on dataset

In [44]:
csvFile = "E:\\mladhd\\datasets\\text\\data_clean_large.csv"
df = pd.read_csv(csvFile, sep=';')
df = df.dropna()

In [45]:
# Predict the whole dataset
X = vectorizer.transform(df['text'])
y_pred = model.predict(X)
df['pred'] = y_pred

In [46]:
# See wrong predictions
wrong = df[df['pred'] != df['class']]
wrong


Unnamed: 0,class,text,image,pred
452,0,Period 11 Posting Google & google.com/sea Ques...,focused_Astronomy_one_452.jpg,1
1804,0,Overflow * What is the result of an arithmetic...,focused_ComputerScience_one_1804.jpg,1
1806,0,YouTube CMSC 311 Lecture 3/28/23 © Unlisted Cl...,focused_ComputerScience_one_1806.jpg,1
2925,0,W George Washington - Wikipedia x @ enwikipedi...,focused_extra_one_2925.jpg,1
2963,0,Sum = 35 & an) (EE canes oi a AAA 33 oe D>>D...,focused_extra_one_2963.jpg,1
...,...,...,...,...
11680,1,1. Study related work to know the state-of-the...,distracted_youtube_partial_3074.jpg,0
11681,1,Share 13k While it is true that the TFG is dev...,distracted_youtube_partial_3075.jpg,0
11685,1,Share Milestone 1: Related work study Particip...,distracted_youtube_partial_3079.jpg,0
11689,1,Short description: Identifying and reviewing p...,distracted_youtube_partial_3083.jpg,0


In [47]:
# see a random wrong prediction
rand_wrong = wrong.sample(1).iloc[0]
print("Image: " + str(rand_wrong.image) + " - Class: " + str(rand_wrong['class']) + " - Predicted: " + str(rand_wrong.pred))
print("Text: " + str(rand_wrong.text))

Image: distracted_twitch_partial_2363.jpg - Class: 1 - Predicted: 0
Text: lcome to the chat room Welcome to the chat room! Ba ¥ InTheThanos22: 1 more hr of clash then‘ Mr_Grumplestiltskin: !carrots JakesJungle: Carrots : mr_grumplestiltskin [139 Hrs] - 1,658 Mi sruna is: @Big_Daddy_Patti D: it's too soon for that (2 TaliBoy_Ryan: i took today off bungie it’s my one day off every 10 years and you do this @ a a_loaf_of_soup: no (QJ BY irchens: | need to get more roses today lol a colorless5917 4¢16,000 8 swift... #¢10,000 ey hotaru... @5,000 ? STREAM CHAT arch SSVIPERZ - $215 f TOP WEEKLY 0 SSVIPERZ - $215 20,534 <> | @ @Q-~, |e) 276 rT A GOC 72567 Ww es a w Subscribe Vv i) Log In >] oO @ Guest (2) Pion ED 2 Organic Chemistry IL Glossar Organic Chemistry | Glossary Math Word Problems Glossary Oxidation is the loss of electron(s) by a species, and reduction is the gain of electron(s). Oxidation and reduction occur simultaneously in redox reactions. In a balanced redox equation, the total 