### Inicjalizacja tokenów 

In [1]:
import tweepy
import configparser
import pandas as pd
from textblob import TextBlob
import re


def get_all_access_configs():
    config = configparser.RawConfigParser()
    config.read('config.ini')
    config = config['twitter']

    return config.values()

_, _, _, _, bearer_token = get_all_access_configs()

client = tweepy.Client(bearer_token)

### Funkcje pomocnicze do pobierania danych

In [61]:
users = ['elonmusk', 'JeffBezos', 'BarackObama', 'SpeakerPelosi', 'KSI', 'saylor', 'SHAQ', 'benshapiro']

def fetch_tweets_from_user(user, client):
    id = client.get_user(username=user)
    tweet_fields=['author_id', 'created_at', 'public_metrics', 'text', 'source', 'possibly_sensitive', 'lang', 'entities', 'context_annotations']

    paginator = tweepy.Paginator(
        method = client.get_users_tweets,
        id = str(id.data.id),
        exclude=['retweets', 'replies'],
        tweet_fields=tweet_fields)

    return [ tweet for tweet in paginator.flatten()]

def get_all_tweets(users, client):
    return [ fetch_tweets_from_user(user, client) for user in users ]


all_tweets = get_all_tweets(users, client)

In [79]:
def is_elon_musk_tweet(tweet):
    elon_musk_id = 44196397
    return [tweet.author_id == elon_musk_id]

def clean_tweet(tweet):
    return re.sub(r"http\S+", "", tweet)

def get_tweet_sentiment(tweet):  
        analysis = TextBlob(clean_tweet(tweet.text))
        return [analysis.sentiment.polarity]

def get_basic_tweet_data(tweet):
    elon_musk_id = 44196397
    return [tweet.author_id == elon_musk_id, tweet.created_at, clean_tweet(tweet.text), tweet.source, tweet.possibly_sensitive, tweet.lang]

def get_public_metrics(tweet):
    return tweet.public_metrics.values()

def connect_generated_tweets_data(tweet, functions):
    data_row = []
    for function in functions:
        data_row.extend(function(tweet))
    return data_row

def make_pd_data_frame(tweets, columns, tweet_data_extract_functions, user, prefix_for_file = 'basic'):
    data_rows = [ connect_generated_tweets_data(tweet, tweet_data_extract_functions) for tweet in tweets]
    df = pd.DataFrame(data=data_rows, columns=columns)
    df.to_csv(f'user_data/{prefix_for_file}_{user}_tweets.csv',index=False)

    return df

def get_all_data_frames_and_save_them(all_tweets, users, columns, tweet_data_extract_functions, prefix_for_file = 'basic'):
    data_frames = [] 
    for index, user in enumerate(users):
        data_frame = make_pd_data_frame(all_tweets[index], columns, tweet_data_extract_functions, user, prefix_for_file)
        data_frames.append(data_frame)
    return data_frames


### Ekstrakcja podstawowych informacji  

In [80]:
columns=[
    'is Elon musk',
    'created at',
    'text',
    'source',
    'possibly sensitive',
    'lang',
    'sentiment',
    'retweet count',
    'reply count',
    'like count',
    'quote count']
tweet_data_extract_functions = [get_basic_tweet_data, get_tweet_sentiment, get_public_metrics]

data_frames = get_all_data_frames_and_save_them(all_tweets, users, columns, tweet_data_extract_functions)
data_frames[0].head(10)

Unnamed: 0,is Elon musk,created at,text,source,possibly sensitive,lang,sentiment,retweet count,reply count,like count,quote count
0,True,2022-10-30 11:48:04+00:00,But Management 201 is such a tantalizing carrot …,Twitter for iPhone,False,en,0.0,2270,2984,76023,170
1,True,2022-10-30 11:42:56+00:00,So demanding to allow a mere 30 days to learn ...,Twitter for iPhone,False,en,0.25,3181,2910,83549,136
2,True,2022-10-30 11:25:19+00:00,Just received this email from Twitter. This is...,Twitter for iPhone,False,en,0.1,19499,20849,248300,2956
3,True,2022-10-29 17:36:31+00:00,#SoBrave,Twitter for iPhone,False,qht,0.0,5347,5131,125344,309
4,True,2022-10-29 17:35:45+00:00,"Finally, the truth that carbs are amazing can ...",Twitter for iPhone,False,en,0.375,47529,39183,596677,6514
5,True,2022-10-29 17:33:16+00:00,Fresh baked bread &amp; pastries are some of t...,Twitter for iPhone,False,en,0.55,28990,27034,453441,4317
6,True,2022-10-29 00:14:33+00:00,"To be super clear, we have not yet made any ch...",Twitter for iPhone,False,en,0.216667,21119,41154,231585,3574
7,True,2022-10-28 21:33:03+00:00,Ligma Johnson had it coming 🍆 💦,Twitter for iPhone,False,en,0.0,60751,35294,631705,12520
8,True,2022-10-28 21:16:42+00:00,Comedy is now legal on Twitter,Twitter for iPhone,False,en,0.2,246982,87685,2236851,37529
9,True,2022-10-28 18:18:55+00:00,Twitter will be forming a content moderation c...,Twitter for iPhone,False,en,-0.065625,61751,50869,599303,13719


### Wycinanie kontekstu tweeta

In [84]:
def get_value_from_dict(tweet, name='annotations'):
    dict = tweet.entities
    if dict is not None:
        dict = dict.get(name)
        if dict is not None:
            return [ [dict[0]['probability'], dict[0]['normalized_text'], dict[0]['type']] ]
    return [None]

def get_context_annotations_domain_value(tweet, context_annotations_number=0, key='domain'):
    context_annotations = tweet.context_annotations

    if len(context_annotations) > context_annotations_number:
        return context_annotations[context_annotations_number][key]['name']
    return None

def create_columns_for_annotations_data(number_of_instances=6):
    annotations_columns = ['is Elon Musk']
    for i in range(number_of_instances):
        annotations_columns.extend([f'context annotations domain {i}', f'context annotations entity {i}'])
    
    annotations_columns.append('annotations')
    return annotations_columns
    
def get_all_annotations_data(tweet, number_of_instances=6):
    annotations_data = []
    for i in range(number_of_instances):
        annotations_data.extend([
            get_context_annotations_domain_value(tweet, i),
            get_context_annotations_domain_value(tweet, i, 'entity')])
    
    return annotations_data

columns = create_columns_for_annotations_data()
tweet_data_extract_functions = [is_elon_musk_tweet, get_all_annotations_data, get_value_from_dict]

data_frame_annotation = get_all_data_frames_and_save_them(all_tweets, users, columns, tweet_data_extract_functions, 'annotations')
data_frame_annotation[0].head(10)

Unnamed: 0,is Elon Musk,context annotations domain 0,context annotations entity 0,context annotations domain 1,context annotations entity 1,context annotations domain 2,context annotations entity 2,context annotations domain 3,context annotations entity 3,context annotations domain 4,context annotations entity 4,context annotations domain 5,context annotations entity 5,annotations
0,True,Business Taxonomy,Financial Services Business,Business Taxonomy,Technology Business,Person,Elon Musk,Interests and Hobbies Vertical,Business & finance,Interests and Hobbies Category,Leadership,Unified Twitter Taxonomy,Elon Musk,
1,True,Business Taxonomy,Financial Services Business,Business Taxonomy,Technology Business,Person,Elon Musk,Interests and Hobbies Vertical,Business & finance,Interests and Hobbies Category,Leadership,Unified Twitter Taxonomy,Elon Musk,
2,True,Business Taxonomy,Financial Services Business,Business Taxonomy,Technology Business,News Vertical,News,News Vertical,Business & finance news,Unified Twitter Taxonomy,Business & finance,Unified Twitter Taxonomy,Tech news,
3,True,Business Taxonomy,Financial Services Business,Business Taxonomy,Technology Business,Person,Elon Musk,Interests and Hobbies Vertical,Business & finance,Interests and Hobbies Category,Leadership,Unified Twitter Taxonomy,Elon Musk,
4,True,Business Taxonomy,Financial Services Business,Business Taxonomy,Technology Business,Person,Elon Musk,Interests and Hobbies Vertical,Business & finance,Interests and Hobbies Category,Leadership,Unified Twitter Taxonomy,Elon Musk,
5,True,Business Taxonomy,Financial Services Business,Business Taxonomy,Technology Business,Person,Elon Musk,Interests and Hobbies Vertical,Business & finance,Interests and Hobbies Category,Leadership,Unified Twitter Taxonomy,Elon Musk,
6,True,Business Taxonomy,Financial Services Business,Business Taxonomy,Technology Business,News Vertical,News,News Vertical,Business & finance news,Unified Twitter Taxonomy,Business & finance,Unified Twitter Taxonomy,Tech news,"[0.9803, Twitter, Other]"
7,True,Business Taxonomy,Financial Services Business,Business Taxonomy,Technology Business,Person,Elon Musk,Interests and Hobbies Vertical,Business & finance,Interests and Hobbies Category,Leadership,Unified Twitter Taxonomy,Elon Musk,"[0.9745, Ligma Johnson, Person]"
8,True,Business Taxonomy,Financial Services Business,Business Taxonomy,Technology Business,News Vertical,News,News Vertical,Business & finance news,Unified Twitter Taxonomy,Business & finance,Unified Twitter Taxonomy,Tech news,"[0.971, Twitter, Other]"
9,True,Business Taxonomy,Financial Services Business,Business Taxonomy,Technology Business,News Vertical,News,News Vertical,Business & finance news,Unified Twitter Taxonomy,Business & finance,Unified Twitter Taxonomy,Tech news,"[0.9778, Twitter, Other]"
