In [1]:
from google.colab import drive

# Montado en Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd "/content/drive/MyDrive/Aplicaciones Avanzadas/DataBank"
!ls

/content/drive/MyDrive/Aplicaciones Avanzadas/DataBank
Fake.csv		    fake_news_model.keras    scaler.pkl
fake_news_classifier.h5     label_encoder.pkl	     tfidf_vectorizer.pkl
fake_news_classifier.keras  noticia_prueba_fake.txt  True.csv
fake_news_dataset.csv	    noticia_prueba_real.txt


In [3]:
import tensorflow as tf
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import joblib


In [4]:
# Descargar stopwords (palabras comunes) (solo la primera vez)
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

# Lemming (Lenguage informal)
nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


In [5]:
# Cargar el modelo, vectorizer, scaler, y label encoder

loaded_vectorizer = joblib.load('tfidf_vectorizer.pkl')
loaded_scaler = joblib.load('scaler.pkl')
loaded_le = joblib.load('label_encoder.pkl')
loaded_model = tf.keras.models.load_model('fake_news_classifier.keras')

In [6]:
# Definir funciones de preprocesamiento de texto

try:
    stop_words = set(stopwords.words('english'))
except LookupError:
    nltk.download('stopwords')
    stop_words = set(stopwords.words('english'))

def limpiar_texto(texto):
    texto = texto.lower()
    texto = re.sub(r'\d+', '', texto)
    texto = re.sub(r'[^\w\s]', '', texto)
    texto = texto.strip()
    texto = re.sub(r'\s+', ' ', texto)
    palabras = texto.split()
    palabras = [p for p in palabras if p not in stop_words]
    return ' '.join(palabras)

try:
    lemmatizer = WordNetLemmatizer()
except LookupError:
    nltk.download('wordnet')
    lemmatizer = WordNetLemmatizer()

def lemmatizar_texto(texto):
  palabras = texto.split()
  palabras = [lemmatizer.lemmatize(p) for p in palabras]
  return ' '.join(palabras)

# Función para preprocesar un texto nuevo
def preprocess_text_for_prediction(text):
    cleaned_text = limpiar_texto(text)
    lemmatized_text = lemmatizar_texto(cleaned_text)
    text_tfidf = loaded_vectorizer.transform([lemmatized_text])
    text_scaled = loaded_scaler.transform(text_tfidf.toarray())
    return text_scaled

In [13]:
# Function para predecir
def predict_from_text_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            text = f.read()

        processed_text = preprocess_text_for_prediction(text)

        prediction_prob = loaded_model.predict(processed_text).flatten()[0]
        prediction = (prediction_prob >= 0.5).astype(int)

        predicted_label = loaded_le.inverse_transform([prediction])[0]

        print(f"Archivo: {file_path}")
        print(f"Probabilidad de ser Real: {prediction_prob:.4f}")
        print(f"Predicción: {'Verdadero' if predicted_label == 'real' else 'Falso'}")
        return predicted_label

    except FileNotFoundError:
        print(f"Error: Archivo no encontrado en {file_path}")
        return None
    except Exception as e:
        print(f"Error al procesar el archivo {file_path}: {e}")
        return None

In [14]:
print('-'*42)
print('Archivos reales')
print('-'*42)
# Noticia real de sky news
predict_from_text_file('/content/real_example.txt')

------------------------------------------
Archivos reales
------------------------------------------
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
Archivo: /content/real_example.txt
Probabilidad de ser Real: 0.8693
Predicción: Verdadero


'real'

In [15]:
predict_from_text_file('/content/real_example2.txt')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
Archivo: /content/real_example2.txt
Probabilidad de ser Real: 0.5092
Predicción: Verdadero


'real'

In [16]:
predict_from_text_file('/content/article_real.txt')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
Archivo: /content/article_real.txt
Probabilidad de ser Real: 0.0000
Predicción: Falso


'fake'