In [1]:
import pandas as pd 
import numpy as np
import openai

In [2]:
api_key = open('API_key.txt', 'r').readline().strip()
openai.api_key = api_key

# User inputs

In [3]:
# question to ask media
request = "Чем занимается Путин"
dates = ['2022-08-01', '2022-12-31'] # list of min and max dates. If only one date - assumed to be min date.
sources = [] # list of sources. Must be exact match to the source name in the dataset 
stance = ['moder'] # list of stances. Must be exact match to the content type in the dataset ['inet propaganda', 'altern', 'tv', 'moder', 'voenkor']

model_name = "gpt-3.5-turbo" # model for summarization
# better model can accomodate more news
# however, the effect of more news in context should be tested
if model_name == "gpt-3.5-turbo": #4K (~10 news)
    price_1K = 0.0015 # price per 1000 characters
elif model_name == "gpt-3.5-turbo-16k": #16K (~40 news)
    price_1K = 0.003
elif model_name == "gpt-4": #8K (~20 news)
    price_1K = 0.03
elif model_name == "gpt-4-32k": #32K (~80 news)
    price_1K = 0.06

### load pickle df & filter

In [4]:
def get_filtered_df(request, dates=None, sources=None, stance=None):
    # load dataset
    df_filtered = pd.read_pickle('../TG news channels/filtered/df_war_ukr_filtered_ada_emb.pkl')
    full_len = df_filtered.shape[0]
    # filter by date, source, stance
    if dates:
        if len(dates) == 1:
            df_filtered = df_filtered[df_filtered['date'] >= dates[0]]
        elif len(dates) == 2:
            df_filtered = df_filtered[(df_filtered['date'] >= dates[0]) & (df_filtered['date'] <= dates[1])]
    if sources:
        df_filtered = df_filtered[df_filtered['source'].isin(sources)]
    if stance:
        df_filtered = df_filtered[df_filtered['stance'].isin(stance)]

    print(f"Number of messages in the filtered dataset: {df_filtered.shape[0]}, {df_filtered.shape[0]/full_len*100:.2f}% of the full dataset")

    return df_filtered

## find top relevant news

In [5]:
# embed request
def get_embedding(text, model="text-embedding-ada-002"):
   text = text.replace("\n", " ")
   return openai.Embedding.create(input = [text], model=model)['data'][0]['embedding']

In [6]:
# OPTION 1: find top N news closest to request (cosine similarity)

# optional parameter request_emb
def get_top_openai(df, request=None, request_emb=None, model="text-embedding-ada-002", top_n=10):
    if request_emb is None and request is None:
        print('Error: no request')
        return
    if request_emb is None:
        request_emb = get_embedding(request)
    df['cos_sim'] = df['emb'].apply(lambda x: np.dot(x, request_emb)/(np.linalg.norm(x)*np.linalg.norm(request_emb)))
    top_sim_news = df.sort_values(by='cos_sim', ascending=False).head(top_n)
    news4request = '\n'.join(top_sim_news.cleaned_message.tolist())
    return news4request

## Ask OpenAI

In [7]:
# simple function - suceptible to openai disconnection

def ask_openai(request, news4request, model_name = "gpt-3.5-turbo"):
    response = openai.ChatCompletion.create(
        model = model_name,
        messages=[
            {
            "role": "system",
            "content": "You are given few short texts in Russian. Based on this texts answer the following question:\n{request}. \nОтвечай на русском."
            },
            {
            "role": "user",
            "content": news4request
            }
        ],
        temperature=1,
        max_tokens=512,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
        )
    return response

In [8]:
df_filtered = get_filtered_df(request, dates=dates, sources=sources, stance=stance)
news4request = get_top_openai(df_filtered, request=request, model="text-embedding-ada-002", top_n=10)

reply = ask_openai(request, news4request, model_name = model_name)

Number of messages in the filtered dataset: 3518, 3.17% of the full dataset


In [20]:
df_ = df_filtered[:10]
# "https://t.me/"+str(channel_name)+"/"+str(msg_id)
# link = "https://t.me/"+str(df_.channel_name)+"/"+str(df_.msg_id)
df_['link'] = df_.apply(lambda x: "https://t.me/"+str(x.channel_name)+"/"+str(x.msg_id), axis=1)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_['link'] = df_.apply(lambda x: "https://t.me/"+str(x.channel_name)+"/"+str(x.msg_id), axis=1)


In [21]:
df_

Unnamed: 0,channel_id,channel_name,msg_id,message,cleaned_message,date,views,number_replies,number_forwards,contains_media,media_type,has_url,url,domain,document_type,emb,stance,cos_sim,link
19920,1038402501,kommersant,45459,"Гости, подарки и нарядная елка не помогают отв...","Гости, подарки и нарядная елка не помогают отв...",2022-12-31,36982,37,100,False,,False,,,,"[-0.022998705506324768, -0.010879949666559696,...",moder,0.808453,https://t.me/kommersant/45459
19921,1038402501,kommersant,45419,Главные заявления пресс-секретаря президента Р...,Главные заявления пресс-секретаря президента Р...,2022-12-30,32615,45,37,True,MessageMediaPhoto,False,,,,"[-0.017266251146793365, -0.002344595966860652,...",moder,0.845058,https://t.me/kommersant/45419
19922,1038402501,kommersant,45407,"Кадры задержания прибывшего в Россию мужчины, ...","Кадры задержания прибывшего в Россию мужчины, ...",2022-12-30,33275,43,15,True,MessageMediaDocument,False,,,video/mp4,"[-0.008091169409453869, -0.024526357650756836,...",moder,0.816336,https://t.me/kommersant/45407
19923,1038402501,kommersant,45402,"🇧🇾Минобороны Белоруссии не исключает, что паде...","Минобороны Белоруссии не исключает, что падени...",2022-12-30,41943,29,24,False,,False,,,,"[-0.024069178849458694, -0.009593642316758633,...",moder,0.806556,https://t.me/kommersant/45402
19924,1038402501,kommersant,45384,◾️Президент РФ Владимир Путин освободил участн...,◾Президент РФ Владимир Путин освободил участни...,2022-12-29,35776,58,10,True,MessageMediaPhoto,False,,,,"[-0.00641512218862772, -0.009843776933848858, ...",moder,0.834342,https://t.me/kommersant/45384
19925,1038402501,kommersant,45376,🇧🇾🇺🇦Посла Украины вызвали в МИД Белоруссии пос...,Посла Украины вызвали в МИД Белоруссии после п...,2022-12-29,48051,45,43,False,,False,,,,"[-0.011971070431172848, -0.006854776293039322,...",moder,0.7917,https://t.me/kommersant/45376
19926,1038402501,kommersant,45365,❗️На территории Белоруссии упала украинская ра...,❗На территории Белоруссии упала украинская рак...,2022-12-29,36831,72,152,False,,False,,,,"[-0.012420917861163616, -0.0008996535325422883...",moder,0.775438,https://t.me/kommersant/45365
19927,1038402501,kommersant,45359,Глава Главного управления разведки Министерств...,Глава Главного управления разведки Министерств...,2022-12-29,41200,52,65,False,,False,,,,"[-0.012306863442063332, -0.007884803228080273,...",moder,0.812769,https://t.me/kommersant/45359
19928,1038402501,kommersant,45296,"Кадры с места ликвидации двух боевиков, которы...","Кадры с места ликвидации двух боевиков, которы...",2022-12-28,37309,8,24,True,MessageMediaDocument,False,,,video/mp4,"[-0.024571457877755165, -0.0015501478919759393...",moder,0.791579,https://t.me/kommersant/45296
19929,1038402501,kommersant,45295,"Двух боевиков, готовивших теракт в Чегеме по з...","Двух боевиков, готовивших теракт в Чегеме по з...",2022-12-28,50184,24,29,False,,False,,,,"[-0.020512806251645088, -0.008688247762620449,...",moder,0.800489,https://t.me/kommersant/45295


In [20]:
reply_text = reply.choices[0]['message']['content']
n_tokens_used = reply.usage.total_tokens
reply_cost = n_tokens_used / 1000 * price_1K
print(f"Cost - ${round(reply_cost, 3)}")
print(f"Request: {request}; \nFilters: dates: {dates}; sources: {sources}; stance: {stance}")
print()
# print wrapped text
print(reply_text)

Cost - $0.003
Request: Чем занимается Путин; 
Filters: dates: ['2022-08-01', '2022-12-31']; sources: []; stance: ['moder']

Based on the given texts, it can be inferred that Vladimir Putin is involved in various activities related to international relations, domestic agriculture, and discussions on the situation in Ukraine. He has been in contact with leaders from China, France, Germany, and also communicated with Dmitry Medvedev. Putin's statements suggest that he is concerned about the Ukrainian crisis and emphasizes the need for a balanced approach. He also discusses issues related to the Zaporizhzhia Nuclear Power Plant and the Ukrainian grain exports. Furthermore, Putin expresses his views on the actions of Western countries and highlights the importance of investigating terrorist attacks on Russian infrastructure. Overall, Putin appears to be actively engaged in diplomatic discussions and decision-making processes, particularly regarding Ukraine and Russia's relationships with ot

In [21]:
# add reply & inputs to csv-file
import csv

# if file does not exist write header
if not os.path.isfile('openai_chatbot.csv'):
    with open('openai_chatbot.csv', 'a', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['request', 'dates', 'sources', 'stance', 'reply_text', 'reply_cost'])

with open('openai_chatbot.csv', 'a', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow([request, dates, sources, stance, reply_text, reply_cost])

# TG bot

In [23]:
from telegram import Update
from telegram.ext import ApplicationBuilder, CommandHandler, ContextTypes


async def hello(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
    await update.message.reply_text(f'Hello {update.effective_user.first_name}')


tg_token = open('tg_api.txt', 'r').readline().strip()

app = ApplicationBuilder().token(tg_token).build()

app.add_handler(CommandHandler("hello", hello))

app.run_polling()

RuntimeError: Cannot close a running event loop

In [25]:
from telegram.ext import Updater, CommandHandler

In [26]:
def do_analysis(input):
    
    return f"result = {input}"

In [27]:
updater = Updater(token=tg_token, use_context=True) 

def start(update, context):
    update.message.reply_text("Hi! I'm your analysis bot.")

def analyze(update, context):
    input = context.args[0] 
    result = do_analysis(input)
    update.message.reply_text(result)

updater.dispatcher.add_handler(CommandHandler('start', start))
updater.dispatcher.add_handler(CommandHandler('analyze', analyze))

TypeError: Updater.__init__() got an unexpected keyword argument 'token'

In [None]:
updater.start_polling()