### Inicjalizacja tokenów 

In [1]:
import tweepy
import configparser
import pandas as pd
from textblob import TextBlob
import re


def get_all_access_configs():
    config = configparser.RawConfigParser()
    config.read('config.ini')
    config = config['twitter']

    return config.values()

_, _, _, _, bearer_token = get_all_access_configs()

client = tweepy.Client(bearer_token)

### Funkcje pomocnicze do pobierania danych

In [19]:
users = ['elonmusk', 'joebiden', 'BarackObama', 'twitter', 'theellenshow', 'youtube', 'cristiano', 'justinbieber', 'cnnbrk', 'billgates', 'narendramodi']

def fetch_tweets_from_user(user, client):
    id = client.get_user(username=user)
    tweet_fields=['author_id', 'created_at', 'public_metrics', 'text', 'source', 'possibly_sensitive', 'lang', 'entities', 'context_annotations']

    paginator = tweepy.Paginator(
        method = client.get_users_tweets,
        id = str(id.data.id),
        exclude=['retweets', 'replies'],
        tweet_fields=tweet_fields)

    return [ tweet for tweet in paginator.flatten()]

def get_all_tweets(users, client):
    return [ fetch_tweets_from_user(user, client) for user in users ]


all_tweets = get_all_tweets(users, client)

In [20]:
def is_elon_musk_tweet(tweet):
    elon_musk_id = 44196397
    return [tweet.author_id == elon_musk_id]

def clean_tweet(tweet):
    return re.sub(r"http\S+", "", tweet)

def get_tweet_sentiment(tweet):  
        analysis = TextBlob(clean_tweet(tweet.text))
        return [analysis.sentiment.polarity]

def get_basic_tweet_data(tweet):
    elon_musk_id = 44196397
    return [tweet.author_id == elon_musk_id, tweet.created_at, clean_tweet(tweet.text), tweet.source, tweet.possibly_sensitive, tweet.lang]

def get_public_metrics(tweet):
    return tweet.public_metrics.values()

def connect_generated_tweets_data(tweet, functions):
    data_row = []
    for function in functions:
        data_row.extend(function(tweet))
    return data_row

def make_pd_data_frame(tweets, columns, tweet_data_extract_functions, user, prefix_for_file = 'basic'):
    data_rows = [ connect_generated_tweets_data(tweet, tweet_data_extract_functions) for tweet in tweets]
    df = pd.DataFrame(data=data_rows, columns=columns)
    df.to_csv(f'user_data/{prefix_for_file}_{user}_tweets.csv',index=False)

    return df

def get_all_data_frames_and_save_them(all_tweets, users, columns, tweet_data_extract_functions, prefix_for_file = 'basic'):
    data_frames = [] 
    for index, user in enumerate(users):
        data_frame = make_pd_data_frame(all_tweets[index], columns, tweet_data_extract_functions, user, prefix_for_file)
        data_frames.append(data_frame)
    return data_frames


### Ekstrakcja podstawowych informacji  

In [21]:
columns=[
    'is Elon Musk',
    'created at',
    'text',
    'source',
    'possibly sensitive',
    'lang',
    'sentiment',
    'retweet count',
    'reply count',
    'like count',
    'quote count']
tweet_data_extract_functions = [get_basic_tweet_data, get_tweet_sentiment, get_public_metrics]

data_frames = get_all_data_frames_and_save_them(all_tweets, users, columns, tweet_data_extract_functions)
data_frames[0].head(3200)

Unnamed: 0,is Elon Musk,created at,text,source,possibly sensitive,lang,sentiment,retweet count,reply count,like count,quote count
0,True,2022-11-15 23:18:02+00:00,Punting relaunch of Blue Verified to November ...,Twitter for iPhone,False,en,0.166667,13731,22246,197911,3051
1,True,2022-11-15 20:43:17+00:00,Important to admit when I’m wrong &amp; firing...,Twitter for iPhone,False,en,-0.050000,18784,17424,418936,3311
2,True,2022-11-15 20:40:54+00:00,Welcoming back Ligma &amp; Johnson!,Twitter for iPhone,False,en,0.000000,45143,29823,695063,11680
3,True,2022-11-15 09:35:35+00:00,"By the People,\nFor the People",Twitter for iPhone,False,en,0.000000,18773,20023,230112,2207
4,True,2022-11-15 09:19:19+00:00,Twitter is All the News,Twitter for iPhone,False,en,0.000000,30119,28046,409425,3294
...,...,...,...,...,...,...,...,...,...,...,...
702,True,2022-03-25 07:34:52+00:00,Free speech is essential to a functioning demo...,Twitter for iPhone,False,en,0.200000,42445,35991,183089,8935
703,True,2022-03-25 01:15:13+00:00,,Twitter for iPhone,False,zxx,0.000000,4544,2660,57458,332
704,True,2022-03-25 01:06:48+00:00,"Old, but good",Twitter for iPhone,False,en,0.400000,12984,4961,194030,773
705,True,2022-03-24 23:55:26+00:00,Most people think the fruit was named after th...,Twitter for iPhone,False,en,0.625000,3318,5774,55372,395


### Wycinanie kontekstu tweeta

In [22]:
def get_value_from_dict(tweet, name='annotations'):
    dict = tweet.entities
    if dict is not None:
        dict = dict.get(name)
        if dict is not None:
            return [ [dict[0]['probability'], dict[0]['normalized_text'], dict[0]['type']] ]
    return [None]

def get_context_annotations_domain_value(tweet, context_annotations_number=0, key='domain'):
    context_annotations = tweet.context_annotations

    if len(context_annotations) > context_annotations_number:
        return context_annotations[context_annotations_number][key]['name']
    return None

def create_columns_for_annotations_data(number_of_instances=6):
    annotations_columns = ['is Elon Musk']
    for i in range(number_of_instances):
        annotations_columns.extend([f'context annotations domain {i}', f'context annotations entity {i}'])
    
    annotations_columns.append('annotations')
    return annotations_columns
    
def get_all_annotations_data(tweet, number_of_instances=6):
    annotations_data = []
    for i in range(number_of_instances):
        annotations_data.extend([
            get_context_annotations_domain_value(tweet, i),
            get_context_annotations_domain_value(tweet, i, 'entity')])
    
    return annotations_data

columns = create_columns_for_annotations_data()
tweet_data_extract_functions = [is_elon_musk_tweet, get_all_annotations_data, get_value_from_dict]

data_frame_annotation = get_all_data_frames_and_save_them(all_tweets, users, columns, tweet_data_extract_functions, 'annotations')
data_frame_annotation[0].head(800)

Unnamed: 0,is Elon Musk,context annotations domain 0,context annotations entity 0,context annotations domain 1,context annotations entity 1,context annotations domain 2,context annotations entity 2,context annotations domain 3,context annotations entity 3,context annotations domain 4,context annotations entity 4,context annotations domain 5,context annotations entity 5,annotations
0,True,Business Taxonomy,Financial Services Business,Business Taxonomy,Technology Business,Person,Elon Musk,Interests and Hobbies Vertical,Business & finance,Interests and Hobbies Category,Leadership,Unified Twitter Taxonomy,Elon Musk,"[0.8925, Blue Verified, Other]"
1,True,Business Taxonomy,Financial Services Business,Business Taxonomy,Technology Business,Person,Elon Musk,Interests and Hobbies Vertical,Business & finance,Interests and Hobbies Category,Leadership,Unified Twitter Taxonomy,Elon Musk,
2,True,Business Taxonomy,Financial Services Business,Business Taxonomy,Technology Business,Person,Elon Musk,Interests and Hobbies Vertical,Business & finance,Interests and Hobbies Category,Leadership,Unified Twitter Taxonomy,Elon Musk,"[0.7064, Ligma, Person]"
3,True,Business Taxonomy,Financial Services Business,Business Taxonomy,Technology Business,Person,Elon Musk,Interests and Hobbies Vertical,Business & finance,Interests and Hobbies Category,Leadership,Unified Twitter Taxonomy,Elon Musk,
4,True,Business Taxonomy,Financial Services Business,Business Taxonomy,Technology Business,News Vertical,News,News Vertical,Business & finance news,Unified Twitter Taxonomy,Business & finance,Unified Twitter Taxonomy,Tech news,"[0.9813, Twitter, Other]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
702,True,Person,Elon Musk,Entities [Entity Service],Services,Brand,Twitter,Interests and Hobbies Vertical,Business & finance,Interests and Hobbies Category,Leadership,Entities [Entity Service],Technology,"[0.9843, Twitter, Other]"
703,True,Person,Elon Musk,Interests and Hobbies Vertical,Business & finance,Interests and Hobbies Category,Leadership,Entities [Entity Service],Technology,Unified Twitter Taxonomy,Elon Musk,Unified Twitter Taxonomy,Tech personalities,
704,True,Person,Elon Musk,Interests and Hobbies Vertical,Business & finance,Interests and Hobbies Category,Leadership,Entities [Entity Service],Technology,Unified Twitter Taxonomy,Elon Musk,Unified Twitter Taxonomy,Tech personalities,
705,True,Person,Elon Musk,Interests and Hobbies Vertical,Business & finance,Interests and Hobbies Category,Leadership,Entities [Entity Service],Technology,Unified Twitter Taxonomy,Elon Musk,Unified Twitter Taxonomy,Tech personalities,
