<a href="https://colab.research.google.com/github/jianna4/Machine_learning/blob/main/untitled4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ============================================================
# 1️⃣ Install & Import Dependencies
# ============================================================
#!pip install -q tensorflow==2.15.0 keras==2.15.0 pandas scikit-learn

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
import numpy as np

print("TensorFlow version:", tf.__version__)

# ============================================================
# 2️⃣ Load Dataset (Provided in Colab or from freeCodeCamp)
# ============================================================

# FreeCodeCamp dataset link (auto downloads if not in your drive)
#url = "https://storage.googleapis.com/download.tensorflow.org/data/smsspamcollection/SMSSpamCollection"
url = "https://raw.githubusercontent.com/justmarkham/pycon-2016-tutorial/master/data/sms.tsv"


# Read dataset into pandas
data = pd.read_csv(url, sep='\t', names=['label', 'message'])
print("Dataset loaded successfully!")
print(data.head())

# ============================================================
# 3️⃣ Preprocess the Data
# ============================================================

# Encode labels: ham -> 0, spam -> 1
label_encoder = LabelEncoder()
data['label'] = label_encoder.fit_transform(data['label'])  # ham=0, spam=1

# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(
    data['message'], data['label'], test_size=0.2, random_state=42
)

# Convert text to TF-IDF vectors
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train).toarray()
X_test_tfidf = vectorizer.transform(X_test).toarray()

print("Training shape:", X_train_tfidf.shape)
print("Testing shape:", X_test_tfidf.shape)

# ============================================================
# 4️⃣ Build the Neural Network Model
# ============================================================

model = keras.Sequential([
    layers.Input(shape=(X_train_tfidf.shape[1],)),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(1, activation='sigmoid')
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.summary()

# ============================================================
# 5️⃣ Train the Model
# ============================================================

history = model.fit(
    X_train_tfidf, y_train,
    epochs=5,
    batch_size=32,
    validation_split=0.2,
    verbose=1
)

# Evaluate model
loss, acc = model.evaluate(X_test_tfidf, y_test, verbose=0)
print(f"\n✅ Test Accuracy: {acc*100:.2f}%")

# ============================================================
# 6️⃣ Define predict_message() Function
# ============================================================

def predict_message(message):
    """
    Takes a single message string as input and returns:
    [probability, 'ham' or 'spam']
    """
    # Vectorize input message
    msg_tfidf = vectorizer.transform([message]).toarray()

    # Predict probability
    prediction = model.predict(msg_tfidf)[0][0]

    # Determine label
    label = "spam" if prediction > 0.5 else "ham"

    return [float(prediction), label]

# ============================================================
# 7️⃣ Test predict_message()
# ============================================================

test_messages = [
    "Congratulations! You have won a $1,000 Walmart gift card. Go to http://bit.ly/123456 to claim now.",
    "Hey, are we still meeting for lunch today?",
    "Free entry in 2 a weekly competition to win FA Cup final tickets."
]

for msg in test_messages:
    print(f"Message: {msg}")
    print(f"Prediction: {predict_message(msg)}")
    print("----")

# ============================================================
# 🎯 You can now run the final test cell from freeCodeCamp
# ============================================================

print("All done! Now submit your notebook link on freeCodeCamp.")


TensorFlow version: 2.19.0
Dataset loaded successfully!
  label                                            message
0   ham  Go until jurong point, crazy.. Available only ...
1   ham                      Ok lar... Joking wif u oni...
2  spam  Free entry in 2 a wkly comp to win FA Cup fina...
3   ham  U dun say so early hor... U c already then say...
4   ham  Nah I don't think he goes to usf, he lives aro...
Training shape: (4457, 5000)
Testing shape: (1115, 5000)


Epoch 1/5
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 22ms/step - accuracy: 0.8610 - loss: 0.4871 - val_accuracy: 0.9025 - val_loss: 0.1770
Epoch 2/5
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9590 - loss: 0.1136 - val_accuracy: 0.9798 - val_loss: 0.0650
Epoch 3/5
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9957 - loss: 0.0234 - val_accuracy: 0.9798 - val_loss: 0.0650
Epoch 4/5
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9979 - loss: 0.0113 - val_accuracy: 0.9787 - val_loss: 0.0715
Epoch 5/5
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9991 - loss: 0.0067 - val_accuracy: 0.9798 - val_loss: 0.0841

✅ Test Accuracy: 98.92%
Message: Congratulations! You have won a $1,000 Walmart gift card. Go to http://bit.ly/123456 to claim now.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m