## Init

In [None]:
import os
import firebase_admin
from firebase_admin import credentials, db
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
import pickle


In [None]:
cred = credentials.Certificate("path/to/serviceAccountKey.json")
firebase_admin.initialize_app(cred, {
    'databaseURL': os.getenv('FIREBASE_DATABASE_URL')
})

## Get data


In [None]:
ref = db.reference('spam_emails')
emails_data = ref.get()

## Clear and setup data


In [None]:
emails = []
labels = []

for email_id, email_info in emails_data.items():
    emails.append(email_info['text'])
    labels.append(email_info['is_spam'])

df = pd.DataFrame({'email': emails, 'label': labels})

In [None]:
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['email'])
y = df['label']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## Train


In [None]:
model = LogisticRegression()
model.fit(X_train, y_train)

In [None]:
with open('spam_detector_model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)

with open('vectorizer.pkl', 'wb') as vectorizer_file:
    pickle.dump(vectorizer, vectorizer_file)


In [None]:
accuracy = model.score(X_test, y_test)