In [2]:
import string
import nltk
import numpy as np
import pandas as pd
from nltk.corpus import stopwords
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D, Input
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Download stopwords if not already downloaded
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

# Function to clean text
def clean_text(text):
    # Remove punctuation
    text = text.translate(str.maketrans('', '', string.punctuation))
    # Convert to lowercase
    text = text.lower()
    # Remove stopwords
    text = ' '.join([word for word in text.split() if word not in stop_words])
    return text

# Load the datasets
true_news = pd.read_csv('true_news.csv')
fake_news = pd.read_csv('fake_news.csv')

# Add a label column to distinguish between true and fake news
true_news['label'] = 'True'
fake_news['label'] = 'Fake'

# Combine the datasets
news_data = pd.concat([true_news, fake_news])

# Clean the titles
news_data['cleaned_title'] = news_data['title'].apply(clean_text)

# Tokenize the titles
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(news_data['cleaned_title'])
X_title = tokenizer.texts_to_sequences(news_data['cleaned_title'])

# Pad the sequences
X_title = pad_sequences(X_title, maxlen=50)

# Encode the labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(news_data['label'])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_title, y, test_size=0.2, random_state=42)

# Build the LSTM model
input_layer = Input(shape=(X_title.shape[1],))
embedding_layer = Embedding(input_dim=5000, output_dim=128, input_length=X_title.shape[1])(input_layer)
spatial_dropout_layer = SpatialDropout1D(0.2)(embedding_layer)
lstm_layer = LSTM(100, dropout=0.2, recurrent_dropout=0.2)(spatial_dropout_layer)
output_layer = Dense(1, activation='sigmoid')(lstm_layer)
model = Model(inputs=input_layer, outputs=output_layer)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=5, batch_size=64, validation_split=0.1, verbose=1)

# Make predictions
y_pred = (model.predict(X_test) > 0.5).astype("int32")

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)

accuracy, precision, recall, f1, roc_auc


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!




Epoch 1/5


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


(0.9535634743875279,
 0.9595959595959596,
 0.9434180138568129,
 0.9514382205659717,
 0.9532143832724925)

In [3]:
from tensorflow.keras.models import load_model

# Save the trained model

model.save('news_classification_model.h5')
# Function for inference
def predict_news_title(model, tokenizer, title):
    # Clean the title
    cleaned_title = clean_text(title)
    # Tokenize the title
    title_sequence = tokenizer.texts_to_sequences([cleaned_title])
    # Pad the sequence
    padded_title = pad_sequences(title_sequence, maxlen=50)
    # Predict
    prediction = model.predict(padded_title)
    # Convert prediction to label
    label = 'True' if prediction > 0.5 else 'Fake'
    return label

# Load the model
loaded_model = load_model('news_classification_model.h5')

# Example usage
example_title = "Breaking news: Major breakthrough in AI research"
predicted_label = predict_news_title(loaded_model, tokenizer, example_title)
print(f"The predicted label for the given news title is: {predicted_label}")


  saving_api.save_model(


The predicted label for the given news title is: Fake


In [6]:
import openai
import sounddevice as sd
import numpy as np
import wavio
import webrtcvad
import os
from openai import OpenAI
from colorama import Fore, Style
import json
from newsapi import NewsApiClient
import time
import joblib
import string
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model
import pickle
import nltk
from nltk.corpus import stopwords
import asyncio
import websockets

nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

# Initialize OpenAI API client
openai.api_key = os.environ["OPENAI_API_KEY"]

# Initialize OpenAI client
client = OpenAI()

# Create the assistant with both search and fake news detection tools
assistant = client.beta.assistants.create(
    instructions="You are a news bot. You can search for news articles based on keywords or provide the latest news headlines in a very short summary news digest. You also detect fake news using a pre-trained model.",
    model="gpt-4-turbo",
    tools=[
        {
            "type": "function",
            "function": {
                "name": "search_news",
                "description": "Search for news articles based on keywords",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "query": {
                            "type": "string",
                            "description": "Keywords to search for, e.g., 'technology', 'politics'"
                        },
                        "type": {
                            "type": "string",
                            "description": "Type of news to search for, either 'top headlines' or 'everything'"
                        }
                    },
                    "required": ["query", "type"]
                }
            }
        },
        {
            "type": "function",
            "function": {
                "name": "detect_fake_news",
                "description": "Detect fake news using a pre-trained model",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "text": {
                            "type": "string",
                            "description": "Text to detect fake news for, e.g., 'The article is fake'"
                        }
                    },
                    "required": ["text"]
                }
            }
        }
    ]
)

# Initialize NewsApiClient
newsapi = NewsApiClient(api_key='925c20de4a3543118751067d2e96e331')

def clean_text(text):
    # Replace newlines and other escape sequences with a space
    text = text.replace('\n', ' ').replace('\r', ' ').replace('\t', ' ')
    
    # Remove punctuation
    text = text.translate(str.maketrans('', '', string.punctuation))
    
    # Replace extra spaces with commas
    text = ','.join(text.split())
    
    return text

# Function to load the best model
def load_best_model(model_path='best_model.pkl'):
    try:
        # Load the model from the specified path
        model = joblib.load(model_path)
        print(f"Model loaded from {model_path}")
        return model
    except Exception as e:
        print(f"Error loading model: {e}")
        return None

# Load the fake news detection model
model = load_best_model()

def clean_title_text(text):
    # Remove punctuation
    text = text.translate(str.maketrans('', '', string.punctuation))
    # Convert to lowercase
    text = text.lower()
    # Remove stopwords
    text = ' '.join([word for word in text.split() if word not in stop_words])
    return text

# Function for inference
def detect_fake_news(title, model=model, tokenizer=tokenizer):
    # Clean the title
    cleaned_title = clean_title_text(title)
    # Tokenize the title
    title_sequence = tokenizer.texts_to_sequences([cleaned_title])
    # Pad the sequence
    padded_title = pad_sequences(title_sequence, maxlen=50)
    # Predict
    prediction = model.predict(padded_title)
    # Convert prediction to label
    label = 'True' if prediction > 0.5 else 'Fake'
    return label

def search_news(query, type):
    if type == "top headlines":
        headlines = newsapi.get_top_headlines(q=query, language='en', country="us")
        headline_titles = []
        if headlines['totalResults'] > 0:
            for headline in headlines['articles']:
                headline_titles.append(headline['title'])
            return clean_text(' '.join(headline_titles))
    articles = newsapi.get_everything(q=query, language='en')
    article_titles = []
    for article in articles['articles']:
        article_titles.append(article['title'])
    return clean_text(' '.join(article_titles))

# Function to record audio using WebRTC VAD
def record_audio_with_vad():
    """Record audio from microphone using Voice Activity Detection."""
    vad = webrtcvad.Vad(3)  # Aggressiveness mode can be 0 to 3. 3 is the most aggressive.
    frames = []
    silent_chunks_count = 0  # Counter for consecutive silent chunks

    with sd.InputStream(samplerate=48000, channels=2, dtype=np.int16) as stream:
        print("Start speaking...")
        while True:
            audio_chunk, _ = stream.read(int(48000 * 0.01))  # 10ms chunk
            if vad.is_speech(audio_chunk.tobytes(), 48000):
                print("Recording...")
                frames.append(audio_chunk)
                break

        while True:
            audio_chunk, _ = stream.read(int(48000 * 0.01))  # 10ms chunk
            frames.append(audio_chunk)
            if vad.is_speech(audio_chunk.tobytes(), 48000):
                silent_chunks_count = 0  # Reset the counter if speech is detected
            else:
                silent_chunks_count += 1

            if silent_chunks_count >= 230:  # 3 seconds of silence
                print("Speech ended.")
                break

    audio_data = np.concatenate(frames, axis=0)
    temp_filename = "temp_recording_with_vad.wav"
    wavio.write(temp_filename, audio_data, 48000)
    return temp_filename

def audio_to_text(audio_file_path, client):
    """Convert audio to text using Whisper ASR."""
    with open(audio_file_path, 'rb') as audio_file:
        transcript = client.audio.translations.create(
            model="whisper-1", 
            file=audio_file
        )
    return transcript.text

# Create a single thread for the session
thread = client.beta.threads.create()

MAX_RETRIES = 3
RETRY_DELAY = 2  # in seconds

async def handler(websocket, path):
    while True:
        message = await websocket.recv()
        print(f"Received message: {message}")

        if message == "CHAT":
            retries = 0
            while retries < MAX_RETRIES:
                try:
                    # Record audio from microphone using VAD
                    audio_file_path = record_audio_with_vad()

                    # Convert audio to text
                    user_message = audio_to_text(audio_file_path, client)
                    print(f"User said: {user_message}")

                    message = client.beta.threads.messages.create(
                        thread_id=thread.id,
                        role="user",
                        content=user_message
                    )

                    run = client.beta.threads.runs.create(
                        thread_id=thread.id,
                        assistant_id=assistant.id,
                    )

                    tool_outputs = []
                    if hasattr(run.required_action, 'submit_tool_outputs'):
                        for tool in run.required_action.submit_tool_outputs.tool_calls:
                            query_dict = json.loads(tool.function.arguments)

                            if tool.function.name == "search_news":
                                query_value = query_dict['query']
                                type_value = query_dict['type']
                                articles = search_news(query=query_value, type=type_value)
                                tool_outputs.append({
                                    "tool_call_id": tool.id,
                                    "output": json.dumps(articles)
                                })

                            if tool.function.name == "detect_fake_news":
                                text_value = query_dict['text']
                                label = detect_fake_news(text_value)
                                tool_outputs.append({
                                    "tool_call_id": tool.id,
                                    "output": json.dumps({"label": label})
                                })

                        if tool_outputs:
                            try:
                                run = client.beta.threads.runs.submit_tool_outputs_and_poll(
                                    thread_id=thread.id,
                                    run_id=run.id,
                                    tool_outputs=tool_outputs
                                )
                                print("Tool outputs submitted successfully.")
                            except Exception as e:
                                print("Failed to submit tool outputs:", e)
                        else:
                            print("No tool outputs to submit.")

                    if run.status == 'completed':
                        messages = client.beta.threads.messages.list(
                            thread_id=thread.id
                        )
                        print("Bot:", messages.data[0].content)
                        await websocket.send(messages.data[0].content)
                        break  # If successful, break out of the retry loop
                    else:
                        print("Run status:", run.status)

                except Exception as e:
                    print(f"Error: {e}")
                    retries += 1
                    if retries < MAX_RETRIES:
                        print(f"Retrying {retries}/{MAX_RETRIES} ...")
                        time.sleep(RETRY_DELAY)
                    else:
                        print("Max retries reached. Sending an error message.")
                        await websocket.send("Error processing request. Please try again later.")
        else:
            await websocket.send(f"Echo: {message}")

start_server = websockets.serve(
    handler, 
    "192.168.146.105", 
    80,
    ping_interval=30, 
    ping_timeout=120
)

asyncio.get_event_loop().run_until_complete(start_server)
asyncio.get_event_loop().run_forever()


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Tool outputs submitted successfully.
Bot: [TextContentBlock(text=Text(annotations=[], value="Here are the latest news headlines from Germany:\n\n1. **European Elections Impact**: Far-right AfD politicians were attacked in two locations in Germany amidst the EU parliamentary elections, which also saw a varied voter age with 16-year-olds voting in Germany and Belgium.\n\n2. **Cybersecurity Efforts**: The Netherlands, France, and Germany led a significant operation against the largest ever seen botnet, which could have implications for future cybersecurity measures.\n\n3. **Climate and Technology**: Germany is gearing up to launch a European 'sovereign cloud' by 2025 with significant investment, reflecting an ongoing trend towards digital sovereignty and secure data environments.\n\n4. **Social Issues and Developments**:\n   - Germany is trialing a four-day work week to test its impact on work-life balance and productivity.\n   - Floods in southern Germany have tragically killed at least 