In [1]:
from searchtweets import load_credentials, ResultStream, gen_rule_payload, collect_results
import pandas as pd
import time
from datetime import datetime
import regex as re

In [2]:
premium_search_args  = load_credentials(filename="twitter_keys_fullarchive.yaml",
                 yaml_key="search_tweets_api",
                 env_overwrite=False)

Grabbing bearer token from OAUTH


# 1) Define search parameters

In [1]:
# "OR": "ligeløn OR løn"
# "AND": "Mette AND Mink"
search_word = '#kv21'

# 2) I HVILKEN PERIODE

In [4]:
rule = gen_rule_payload(search_word,
                        results_per_call=500,
                       from_date = '2021-11-16', #date from
                       to_date = '2021-11-17') #date to
print(rule)

{"query":"#kv21","maxResults":500,"toDate":"202111170000","fromDate":"202111160000"}


In [5]:
rs = ResultStream(rule_payload=rule,
                  max_results=10000,
                  **premium_search_args)

print(rs)

ResultStream: 
	{
    "username": null,
    "endpoint": "https:\/\/api.twitter.com\/1.1\/tweets\/search\/fullarchive\/OperateTwitter.json",
    "rule_payload": {
        "query": "#kv21",
        "maxResults": 500,
        "toDate": "202111170000",
        "fromDate": "202111160000"
    },
    "tweetify": true,
    "max_results": 10000
}


In [6]:
tweets = list(rs.stream())

In [7]:
df = pd.DataFrame(tweets)
df.columns

Index(['created_at', 'id', 'id_str', 'text', 'source', 'truncated',
       'in_reply_to_status_id', 'in_reply_to_status_id_str',
       'in_reply_to_user_id', 'in_reply_to_user_id_str',
       'in_reply_to_screen_name', 'user', 'geo', 'coordinates', 'place',
       'contributors', 'is_quote_status', 'quote_count', 'reply_count',
       'retweet_count', 'favorite_count', 'entities', 'favorited', 'retweeted',
       'filter_level', 'lang', 'matching_rules', 'extended_tweet',
       'possibly_sensitive', 'display_text_range', 'retweeted_status',
       'extended_entities', 'quoted_status_id', 'quoted_status_id_str',
       'quoted_status', 'quoted_status_permalink'],
      dtype='object')

In [8]:
df['retweet'] = df['retweeted_status'].apply(lambda x: pd.isnull(x) == False)
df['dato'] = df['created_at'].apply(lambda x: time.strftime('%Y-%m-%d', time.strptime(x,'%a %b %d %H:%M:%S +0000 %Y')))
df['bruger'] = df['user'].apply(lambda x: x['name'])
df['bruger_beskrivelse'] = df['user'].apply(lambda x: x['description'])
df['twitter_handle'] = df['user'].apply(lambda x: x['screen_name'])

In [9]:
def text(a, b):
    
    if pd.isnull(b) == True:
        
        t = a
    else:
        t = b['full_text']
    return t

In [11]:
df['tweet_text'] = df.apply(lambda x: tekst(x['text'], x['extended_tweet']), axis=1)

## Add sentiment

In [13]:
from sentida import Sentida
SV = Sentida()

In [14]:
def fjern_https(t):
    
    split = t.split()
    
    string = []
    for sub in split:
        sub= re.sub(r'^https:\/\/.*[\r\n]*', '.', sub)
        string.append(sub)
    
    string = ' '.join(string)
        
    return string

In [15]:
df['tweet_text_ren'] = df['tweet_text'].apply(lambda x: fjern_https(x))

In [16]:
df['sentiment']=df['tweet_text_ren'].apply(lambda x: SV.sentida(text = x, output = 'mean', normal = False))

In [17]:
def sentiment_kat(s):
    if s <-3:
        score = 'Meget Negativ'
    if -3 <= s <-1.5:
        score = 'Negativ'
    if -1.5 <= s <-0.5:
        score = 'Lidt Negativ'
    if -0.5 <= s <= 0.5:
        score = 'Neutral'
    if 0.5 < s <= 1.5:
        score = 'Lidt Positiv'
    if 1.5 < s <= 3:
        score = 'Positiv'
    if s > 3:
        score = 'Meget Positiv'
    
    return score

In [18]:
df['sentiment_kategori'] = df['sentiment'].apply(lambda x: sentiment_kat(x))

In [19]:
df.head(2)

Unnamed: 0,created_at,id,id_str,text,source,truncated,in_reply_to_status_id,in_reply_to_status_id_str,in_reply_to_user_id,in_reply_to_user_id_str,...,quoted_status_permalink,retweet,dato,bruger,bruger_beskrivelse,twitter_handle,tweet_text,tweet_text_ren,sentiment,sentiment_kategori
0,Tue Nov 16 23:59:44 +0000 2021,1460759524789342218,1460759524789342218,Opsummering af dækningen af #kv21:\nBorgmester...,"<a href=""https://mobile.twitter.com"" rel=""nofo...",False,,,,,...,,False,2021-11-16,Frederik Kjøller Larsen,PhD Fellow at @polscicph @uni_copenhagen • ret...,fkjoellerlarsen,Opsummering af dækningen af #kv21:\nBorgmester...,Opsummering af dækningen af #kv21: Borgmester ...,0.333333,Neutral
1,Tue Nov 16 23:59:41 +0000 2021,1460759508288954377,1460759508288954377,Eva Kjer Hansen virker arrogant og forsmået so...,"<a href=""https://mobile.twitter.com"" rel=""nofo...",True,,,,,...,,False,2021-11-16,Sandra C. Trebbien,Stud.Cand.soc Development & IR - BA.Scient.adm...,SCTrebbien,Eva Kjer Hansen virker arrogant og forsmået so...,Eva Kjer Hansen virker arrogant og forsmået so...,-0.077778,Neutral
