# Step #1 Imports and Authentication

In [60]:
# A tutorial for this file is available at www.relataly.com
# Tested with Python 3.9.13, 1.2.0, Pandas 1.3.4, OpenAI 0.27.3, Tweepy 4.13.0, Requests 2.26.0, 

import logging
import pandas as pd
import openai
import tweepy
import csv
import requests
import random

# # Set API Keys and Authentification
# logging.info('Setting NewsAPI API Key')
# NEWSAPI_API_KEY = <your API key> # replace with own API key

# logging.info('Setting OpenAI API Key')
# openai.api_key = <your API key> # replace with own API key

# logging.info('Setting Twitter API Key')
# auth=tweepy.OAuthHandler(<your API key>,
#                          <your API secret>)
# auth.set_access_token(<your access token>,
#                       <your access secret>)
# twitter_api=tweepy.API(auth)

CSV_NEWS_NAME = 'news_log.csv'
CSV_FACTS_NAME = 'facts_log.csv'

In [61]:
from azure.identity import AzureCliCredential
from azure.keyvault.secrets import SecretClient
import tweepy
from azure.storage.blob import BlobServiceClient
import io
import pandas as pd
import openai


# Set up the Azure Key Vault client and retrieve the Blob Storage account credentials
keyvault_name = 'keyvaultforbot'
client = SecretClient(f"https://{keyvault_name}.vault.azure.net/", AzureCliCredential())
NEWSAPI_API_KEY = client.get_secret('newsapi-api-key').value
#### Twitter Auth
def twitter_auth():
    auth=tweepy.OAuthHandler(client.get_secret('twitter-api-key').value,
                            client.get_secret('twitter-api-secret').value)
    auth.set_access_token(client.get_secret('twitter-access-token').value,
                        client.get_secret('twitter-access-secret').value)
    twitter_api=tweepy.API(auth)
    logging.info('Twitter API ready')
    return twitter_api

##### OpenAI API Key
openai.api_key = client.get_secret('openai-api-key').value

# Step #2 Function for News API

In [62]:
#### NewsAPI
def fetch_news(number=10):
    # Fetch tech news from NewsAPI
    url = f"https://newsapi.org/v2/top-headlines?country=us&category=technology&apiKey={NEWSAPI_API_KEY}"
    response = requests.get(url).json()
    news_items = response["articles"]
    df = pd.DataFrame(news_items)
    df = df[["title", "description", "url"]].dropna()
    return df.head(number)

# Step #3 OpenAI Functions for News Relevance and Tweet Generation

In [63]:
#### OpenAI Engine
def openai_request(instructions, task, sample = [], model_engine='gpt-3.5-turbo'):
    prompt = [{"role": "system", "content": instructions }, 
              {"role": "user", "content": task }]
    prompt = sample + prompt
    completion = openai.ChatCompletion.create(model=model_engine, messages=prompt, temperature=0.2, max_tokens=400)
    return completion.choices[0].message.content


#### Define OpenAI Prompt for news Relevance
def select_relevant_news_prompt(news_articles, topics, n):    
    instructions = f'Your task is to examine a list of News and return a list of boolean values that indicate which of the News are in scope of a list of topics. \
    Return a list of True or False values that indicate the relevance of the News.'
    task =  f"{news_articles} /n {topics}?" 
    sample = [
        {"role": "user", "content": f"[new AI model available from Nvidia, We Exploded the AMD Ryzen 7, Release of b2 Game, XGBoost 3.0 improvices Decision Forest Algorithms, New Zelda Game Now Available, Ukraine Uses a New Weapon] /n {topics}?"},
        {"role": "assistant", "content": "[True, False, False, True, False, False]"},
        {"role": "user", "content": f"[Giga Giraff found in Sounth Africa, We Exploded the AMD Ryzen 7, Release of b2 Game, Donald Trump to make a come back, New Zelda Game Now Available, Ukraine Uses a New Weapon] /n {topics}?"}, 
        {"role": "assistant", "content": "[False, False, False, False, False, False]"}]
    return instructions, task, sample


#### Define OpenAI Prompt for news Relevance
def check_previous_posts_prompt(title, old_posts):    
    instructions = f'Your objective is to compare a news title with a list of previous news and determine whether it covers a similar topic that was already covered by a previous title. \
        Rate the overlap on a scale between 1 and 10 with 1 beeing a full overlap and 10 representing an unrelated topic. "'
    task =  f"'{title}.' Previous News: {old_posts}."
    sample = [
        {"role": "user", "content": "'Nvidia launches new AI model.' Previous News: [new AI model available from Nvidia, We Exploded the AMD Ryzen 7 7800X3D, The Lara Croft Collection For Switch Has Been Rated By The ESRB]."},
        {"role": "assistant", "content": "1"},
        {"role": "user", "content": "'Big Explosion of an AMD Ryzen 7.' Previous News: [Improving Mental Wellbeing Through Physical Activity, The Lara Croft Collection For Switch Has Been Rated By The ESRB]."},
        {"role": "assistant", "content": "10"},
        {"role": "user", "content": "'new AI model available from Google.' Previous News : [new AI model available from Nvidia, The Lara Croft Collection For Switch Has Been Rated By The ESRB]."},
        {"role": "assistant", "content": "9"},
        {"role": "user", "content": "'What Really Made Geoffrey Hinton Into an AI Doomer - WIRED.' Previous News : [Why AI's 'godfather' Geoffrey Hinton quit Google, new AI model available from Nvidia, The Lara Croft Collection For Switch Has Been Rated By The ESRB]."},
        {"role": "assistant", "content": "4"}]
    return instructions, task, sample


#### Define OpenAI Prompt for News Tweet
def create_tweet_prompt(title, description, tiny_url):
    instructions = f'You are a twitter user that creates tweets with a maximum length of 280 characters.'
    task = f"Create an informative tweet on twitter based on the following news title and description. \
        The tweet must use a maximum of 280 characters. \
        Include the {tiny_url}. But do not include any other urls.\
        Title: {title}. \
        Description: {description}. \
        Use hashtags to reach a wider audience. \
        Do not include any emojis in the tweet"
    return instructions, task


#### Define OpenAI Prompt for news Relevance
def previous_post_check(title, old_posts):
    instructions, task, sample = check_previous_posts_prompt(title, old_posts)
    response = openai_request(instructions, task, sample)
    return eval(response)


#### Define OpenAI Prompt for News Tweet
def create_fact_tweet_prompt(old_terms):
    instructions = f'You are a twitter user that creates tweets with a length below 280 characters.'
    task = f"Choose a technical term from the field of AI, machine learning or data science. Then create a twitter tweet that describes the term. Just return a python dictionary with the term and the tweet. "
    # if old terms not empty
    if old_terms != []:
        avoid_terms =f'Avoid the following terms, because you have previously tweetet about them: {old_terms}'
        task = task + avoid_terms
    sample = [
        {"role": "user", "content": f"Choose a technical term from the field of AI, machine learning or data science. Then create a twitter tweet that describes the term. Just return a python dictionary with the term and the tweet."},
        {"role": "assistant", "content": "{'GradientDescent': '#GradientDescent is a popular optimization algorithm used to minimize the error of a model by adjusting its parameters. \
         It works by iteratively calculating the gradient of the error with respect to the parameters and updating them accordingly. #ML'}"}]
    return instructions, task, sample

# Load previous information from a csv file
def get_history_from_csv(csv_name):
    try:
        # try loading the csv file
        df = pd.read_csv(csv_name)
    except:
        # create the csv file
        df = pd.DataFrame(columns=['title'])
        df.to_csv(csv_name, index=False)
    return df



# Step #4 Functions for Tweeting

In [64]:
def check_tweet_length(tweet):
    return False if len(tweet) > 280 else True


# Create the fact tweet
def create_fact_tweet(chance_for_tweet = 0.5):
    df_old_facts = get_history_from_csv(CSV_FACTS_NAME)

    if random.random() < chance_for_tweet:
        # create a fact tweet
        instructions, tasks, sample = create_fact_tweet_prompt(list(df_old_facts.tail(10)['title']))
        tweet = openai_request(instructions, tasks, sample)
        tweet_text = list(eval(tweet).values())[0]

        # tweet creation
        print(f'Creating fact tweet: {tweet_text}')
        
        # check tweet length and post tweet
        if check_tweet_length(tweet):
            twitter_auth().update_status(tweet_text)
            term = list(eval(tweet).keys())[0]
            # save the fact in the csv file
            with open(f'{CSV_FACTS_NAME}', 'a', newline='') as file:
                            writer = csv.writer(file)
                            writer.writerow([term])
        else: 
            print('error tweet too long')
    else:
        print('No fact tweet created')
    

def create_news_tweet(title, description, url):
    # create tiny url
    tiny_url = create_tiny_url(url)

    # define prompt for tweet creation
    instructions, task = create_tweet_prompt(title, description, tiny_url)
    tweet_text = openai_request(instructions, task)

    print(f'Creating new tweet: {tweet_text}')
    # check tweet length and post tweet
    if check_tweet_length(tweet_text):
            status = twitter_auth().update_status(tweet_text)
            print(f"Tweeted: {title}")
            with open(f'{CSV_NEWS_NAME}', 'a', newline='') as file:
                writer = csv.writer(file)
                writer.writerow([title])
    else: 
        status = 'error tweet too long'
    return status


def create_tiny_url(url):
    response = requests.get(f'http://tinyurl.com/api-create.php?url={url}')
    return response.text

# Step #5 Bringing it All Together - Running the Bot

In [65]:
#### Main Bot
def main_bot():
    # Read the old CSV data
    # try opening the csv file and creeate it if it does not exist
    df_old_news = get_history_from_csv(CSV_NEWS_NAME)
    df_old_news = df_old_news.tail(16)
    # Fetch news data
    df = fetch_news(12)
    
    # Check the Relevance of the News and Filter those not relevant
    relevant_topics ="[AI, Machine Learning, Data Science, OpenAI, Artificial Intelligence, Data Mining, Data Analytics]"
    instructions, task, sample = select_relevant_news_prompt(list(df['title']), relevant_topics, len(list(df['title'])))
    relevance = openai_request(instructions, task, sample)
    relevance_list = eval(relevance)

    s = 0
    df = df[relevance_list]
    if len(df) > 0:
        for index, row in df.iterrows():
            if s == 1:
                break
            logging.info('info:' + row['title'])
            title = row['title']
            title = title.replace("'", "")
            description = row['description']
            url = row['url']            
                                             
            if (title not in df_old_news.title.values):
                doublicate_check = previous_post_check(title, list(df_old_news.tail(10)['title']))
                if doublicate_check > 3:
                    # Create a tweet
                    response = create_news_tweet(title, description, url)
            
                else: 
                    print(f"Already tweeted: {title}")
            else: 
                print("No news articles found")
                create_fact_tweet(chance_for_tweet=0.5)

main_bot()

No news articles found
No fact tweet created
No news articles found
Creating fact tweet: Looking for a way to cluster your data? Try #KMeans! It is an unsupervised learning algorithm that groups similar data points together based on their distance to a centroid. #MachineLearning #DataScience
Creating tweet: {'KMeans': 'Looking for a way to cluster your data? Try #KMeans! It is an unsupervised learning algorithm that groups similar data points together based on their distance to a centroid. #MachineLearning #DataScience'}
