In [17]:
import json

import numpy as np
from nltk.stem import WordNetLemmatizer
import random
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import SGD

# Load intents from JSON
def load_intents(filename):
    with open(filename, 'r') as file:
        intents = json.load(file)
    return intents['intents']


def preprocess_intents(intents):
    words = []
    classes = []
    documents = []
    for intent in intents:
        for pattern in intent['patterns']:
            # Tokenize each pattern and add words to the vocabulary
            w = nltk.word_tokenize(pattern)
            words.extend(w)
            # Add pattern words to words list if not already present
            documents.append((w, intent['tag']))
        # Add intent tag to classes list
        if intent['tag'] not in classes:
            classes.append(intent['tag'])
    # Lemmatize words and remove duplicates
    words = [WordNetLemmatizer().lemmatize(w.lower()) for w in words if w not in ['?', '.', '!']]
    words = sorted(list(set(words)))
    # Sort classes
    classes = sorted(list(set(classes)))
    return words, classes, documents


# Create training data
def create_training_data(words, classes, documents):
    training = []
    output_empty = [0] * len(classes)
    for doc in documents:
        bag = []
        pattern_words = doc[0]
        pattern_words = [WordNetLemmatizer().lemmatize(word.lower()) for word in pattern_words]
        for w in words:
            bag.append(1) if w in pattern_words else bag.append(0)

        output_row = list(output_empty)
        output_row[classes.index(doc[1])] = 1

        training.append([bag, output_row])

    random.shuffle(training)
    train_X = []
    train_Y = []
    for features, label in training:
        train_X.append(features)
        train_Y.append(label)
    return train_X, train_Y


# Modify the create_model function
def create_model(input_size, output_size):
    model = Sequential([
        Dense(128, input_shape=(input_size,), activation='relu'),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(output_size, activation='softmax')
    ])
    # Use the new version of SGD optimizer
    sgd = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
    return model


# Main function
def main():
    intents = load_intents('intents2.json')
    words, classes, documents = preprocess_intents(intents)
    train_X, train_Y = create_training_data(words, classes, documents)
    input_size = len(train_X[0])
    output_size = len(train_Y[0])

    model = create_model(input_size, output_size)
    model.fit(np.array(train_X), np.array(train_Y), epochs=200, batch_size=5, verbose=1)

if __name__ == "__main__":
    main()




Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [48]:
#use case 1 - get headlines
import requests

def get_headlines():
    
    api_url = "https://newsapi.org/v2/top-headlines"
    
    
    api_key = '05a64f048db44f29b5d7d30402cac91a'
    
    #parameters for the API request
    parameters = {
        "apiKey": api_key,
        "country": "us",  
        "pageSize": 5    
    }
    
    try:
        # Sending a get request
        response = requests.get(api_url, params=parameters)
        
        # Checking if the request was successful (status code 200)
        if response.status_code == 200:
            # Parsing the JSON response
            data = response.json()
            
            # Extracting the headlines from the response
            headlines = [article['title'] for article in data['articles']]
            
            # Printing the headlines
            for headline in headlines:
                print(headline)
                
        else:
            print("Failed to fetch headlines. Status code:", response.status_code)
    
    except Exception as e:
        print("An error occurred:", e)

#Example usage:
#get_headlines()


In [49]:
#use case 2 - search news by topic
import requests
import re
"""
def extract_topic(user_input):
    # Define regular expression patterns to match phrases like "Find news about [topic]"
    patterns = [
        r"Find news about (.*)$",
        r"Show me articles on (.*)$"
    ]
    
    # Iterate over patterns and attempt to match them with user input
    for pattern in patterns:
        match = re.search(pattern, user_input)
        if match:
            # Extract the topic from the matched pattern
            topic = match.group(1)
            print("Extracted topic:", topic)
            return topic
    
    # If no pattern matches, return None
    return None
"""

def search_news_by_topic(topic):
   
    api_key = '05a64f048db44f29b5d7d30402cac91a'
    url = f'https://newsapi.org/v2/everything?q={topic}&apiKey={api_key}'
    
    try:
        response = requests.get(url)
        data = response.json()
        
        # Extracting relevant information from the response, such as article titles, descriptions, sources, etc.
        articles = data['articles']
        
        # Printing the  information
        for article in articles[:5]:
            title = article['title']
            description = article['description']
            source = article['source']['name']
            print(f"Title: {title}\nDescription: {description}\nSource: {source}\n")
    
    except Exception as e:
        print("An error occurred:", str(e))

#Example usage:
#search_news_by_topic("technology")


In [None]:
import difflib
import re
import json

def load_intents(file_path):
    with open(file_path, 'r') as file:
        intents_data = json.load(file)
    return intents_data['intents']
"""
def detect_intent(user_input, intents, similarity_threshold=0.7):
    for intent in intents:
        for pattern in intent['patterns']:
            similarity_score = difflib.SequenceMatcher(None, pattern.lower(), user_input.lower()).ratio()
            if similarity_score >= similarity_threshold:
                return intent['tag']
    return None

def detect_intent(user_input, intents):
    intent, topic = None, None
    for intent_data in intents:
        for pattern in intent_data['patterns']:
            regex_pattern = re.compile(pattern, re.IGNORECASE)
            match = regex_pattern.match(user_input)
            if match:
                # Extract the topic from the user input
                topic = user_input.replace(match.group(0), '').strip()
                intent = intent_data['tag']
                print("Detected intent:", intent)
                print("Extracted topic:", topic)
                return intent, topic
    
    # If no intent is detected, return (None, None)
    print("Intent not recognized.")
    return intent, topic
"""
import difflib
import re

def detect_intent(user_input, intents, similarity_threshold=0.55):
    intent, topic = None, None
    for intent_data in intents:
        for pattern in intent_data['patterns']:
            regex_pattern = re.compile(pattern, re.IGNORECASE)
            match = regex_pattern.match(user_input)
            if match:
                # Extract the topic from the user input
                topic = user_input.replace(match.group(0), '').strip()
                intent = intent_data['tag']
                print("Detected intent:", intent)
                print("Extracted topic:", topic)
                return intent, topic
    
    # If no exact match is found, try similarity scoring
    max_similarity_score = 0
    for intent_data in intents:
        for pattern in intent_data['patterns']:
            similarity_score = difflib.SequenceMatcher(None, pattern.lower(), user_input.lower()).ratio()
            if similarity_score >= similarity_threshold and similarity_score > max_similarity_score:
                # Extract the topic from the user input
                topic = user_input.replace(pattern, '').strip()
                intent = intent_data['tag']
                max_similarity_score = similarity_score
    if intent:
        print("Detected intent (similarity score):", intent)
        print("Extracted topic (similarity score):", topic)
        return intent, topic
    
    # If no intent is detected, return (None, None)
    if not intent and not topic:
        print("Intent not recognized.")
    return intent, topic
"""
def main():
    intents = load_intents('/Users/mohitsingh/classeswinter24/ai/jupyter/intents2.json')
    print("Welcome to the News Chatbot!")
    while True:
        user_input = input("You: ")
        if user_input.lower() == "0":
            print("Goodbye!")
            break
        intent = detect_intent(user_input, intents)
        if intent:
            print("Detected intent:", intent)
            if intent == 'get_headlines':
                get_headlines()  # Call the function for the 'get_headlines' intent
            elif intent == "search_by_topic":
                # Extract topic from user input
                topic = extract_topic(user_input)
                response = search_news_by_topic(topic)
            else:
                print("Intent not recognized.")
        else:
            print("Intent not recognized.")
"""
def main():
    intents = load_intents('intents2.json')
    print("Welcome to the News Chatbot!")
    while True:
        user_input = input("You: ")
        if user_input.lower() == "0":
            print("Goodbye!")
            break
        intent, topic = detect_intent(user_input, intents)
        if intent:
            print("Detected intent:", intent)
            if intent == 'get_headlines':
                get_headlines()  # Call the function for the 'get_headlines' intent
            elif intent == "search_by_topic":
                response = search_news_by_topic(topic)
            else:
                print("Intent not recognized.")
        else:
            print("Intent not recognized.")

if __name__ == "__main__":
    main()


Welcome to the News Chatbot!


You:  find news about tesla


Detected intent: search_by_topic
Extracted topic: tesla
Detected intent: search_by_topic
Title: Tesla will unveil a robotaxi on April 8, according to Musk
Description: Tesla is introducing a robotaxi on August 8, Elon Musk has announced on X a few hours after Reuters published a report that the automaker is scrapping its plans to produce a low-cost EV. Reuters also said that Musk's directive was to "go all in" on robotaxis …
Source: Yahoo Entertainment

Title: Tesla sees EV deliveries drop year-over-year for the first time since 2020
Description: Tesla
 has revealed how many vehicles it delivered in the first three months of 2024 and the figures dropped significantly from both the previous quarter and the same period in 2023
. The company handed over
 386,810 EVs during the period.
That's down 20 p…
Source: Yahoo Entertainment

Title: Tesla Is Going All In on Robotaxis—Buckle Up
Description: Autonomous vehicle tech has proven tricky for even the most sophisticated tech developers. But 