# Data preparation

In [1]:
!pip install emoji
import nltk
nltk.download('punkt')

Collecting emoji
[?25l  Downloading https://files.pythonhosted.org/packages/40/8d/521be7f0091fe0f2ae690cc044faf43e3445e0ff33c574eae752dd7e39fa/emoji-0.5.4.tar.gz (43kB)
[K     |███████▌                        | 10kB 15.1MB/s eta 0:00:01[K     |███████████████                 | 20kB 7.4MB/s eta 0:00:01[K     |██████████████████████▋         | 30kB 10.3MB/s eta 0:00:01[K     |██████████████████████████████▏ | 40kB 6.3MB/s eta 0:00:01[K     |████████████████████████████████| 51kB 6.4MB/s 
[?25hBuilding wheels for collected packages: emoji
  Building wheel for emoji (setup.py) ... [?25l[?25hdone
  Created wheel for emoji: filename=emoji-0.5.4-cp36-none-any.whl size=42175 sha256=a91abc038599530ca559da01ee4b5c4c3e97f15da857e26ffba7eb5c927f9e81
  Stored in directory: /root/.cache/pip/wheels/2a/a9/0a/4f8e8cce8074232aba240caca3fade315bb49fac68808d1a9c
Successfully built emoji
Installing collected packages: emoji
Successfully installed emoji-0.5.4
[nltk_data] Downloading package pu

True

In [1]:
from nltk.tokenize import word_tokenize
import matplotlib.pyplot as plt
from collections import Counter
from textblob import TextBlob
from bs4 import BeautifulSoup
import matplotlib.cm as cm
from string import punctuation
from string import digits
import urllib.request
import networkx as nx
import pandas as pd
import itertools
import emoji
import math
import time
import json
import sys
import re
import os

# TODO: remove this when NetworkX is fixed
from warnings import simplefilter
import matplotlib.cbook
simplefilter("ignore", matplotlib.cbook.mplDeprecation)

# Supress scientific notation
pd.options.display.float_format = '{:.2f}'.format

In [2]:
# Import tweets
tweets = pd.read_csv('https://raw.githubusercontent.com/cpenalozag/twitter_network/master/tweets/statuses.csv')
tweets["id"] = pd.to_numeric(tweets["id"])
tweets["tweet_id"] = pd.to_numeric(tweets["tweet_id"])
print(len(tweets))
tweets.head()

203176


Unnamed: 0,id,tweet_id,created_at,text,favorite_count,retweet_count,phone,sensitive,hashtags,no_hashtags,mentions,no_mentions,no_urls,no_media
0,113127283,1172524340002390000,9/13/19 14:56,@enriqueserrano3 @Restrebooks @ELTIEMPO @enriq...,1,0,Twitter Web App,False,-,0,"[238261607, 3252651209, 9633802, 1073577478566...",6,0,1
1,113127283,1172523885654350000,9/13/19 14:54,@enriqueserrano3 @Restrebooks @ELTIEMPO @enriq...,2,0,Twitter Web App,False,-,0,"[238261607, 3252651209, 9633802, 1073577478566...",5,0,1
2,113127283,1172523505537170000,9/13/19 14:52,@enriqueserrano3 @Restrebooks @ELTIEMPO @enriq...,1,0,Twitter Web App,False,-,0,"[238261607, 3252651209, 9633802, 1073577478566...",6,0,1
3,113127283,1172522933601880000,9/13/19 14:50,@enriqueserrano3 @Restrebooks @ELTIEMPO @enriq...,0,0,Twitter Web App,False,-,0,"[238261607, 3252651209, 9633802, 1073577478566...",6,0,1
4,113127283,1172522449184960000,9/13/19 14:48,@enriqueserrano3 @Restrebooks @ELTIEMPO @enriq...,0,0,Twitter Web App,False,-,0,"[238261607, 3252651209, 9633802, 1073577478566...",4,0,1


In [3]:
# Import spanish stop word dictionary
url_sw = 'https://raw.githubusercontent.com/cpenalozag/twitter_network/master/utils/stopwords-es.json'
response_sw = urllib.request.urlopen(url_sw)
data_sw = response_sw.read()

stop_words = set(json.loads(data_sw))

# Import emoji meanings
emoji_translations = pd.read_csv('https://raw.githubusercontent.com/cpenalozag/twitter_network/master/utils/emojis_translated.csv')

# Get spanish meaning of an emoji
def emoji_meaning(emoji):
    meaning = emoji_translations.loc[emoji_translations['emoji'] == emoji]['translation']
    return meaning.values[0] if not meaning.empty else ''

# Transformations to remove digits and punctuation
remove_digits = str.maketrans('', '', digits)
remove_punctuation = str.maketrans('', '', punctuation)

In [4]:
from utils.ipython_exit import exit

""" Processes text data in tweets
    
    text: text of tweet
    word_counts: word count dictionary
    
    returns processed text, length of content, polarity, subjectivity
"""
def process_text(text, word_counts, user_id):
    # Remove links
    text = ' '.join(re.sub("(\w+:\/\/\S+)", " ", text).split())
    
    # Remove file names
    
        
    # Remove mentions
    text = ' '.join(re.sub("(@[A-Za-z0-9^\w]+)", " ", text.replace('@ ','@').replace('# ','#')).split())
        
    # Replace hashtags with words
    if text.count('#')>0:
        text = ' '.join(re.findall('[A-Z][^A-Z]*', text.replace('#', ' ')))
            
    #Remove HTML tags
    text = BeautifulSoup(text).get_text()
    
    # Save content length (exluding links and mentions)
    length = len(text)
        
    # Remove punctuation symbols
    text = ' '.join(re.sub("[\.\,\¡\¿\!\?\:\;\-\=\*\(\)\[\]\"\'\“\_\+\”\%\/\‘\’]", " ", text).split())
    text = text.translate(remove_digits).translate(remove_punctuation)
        
    # Lower case to avoid case sensitive problems
    text = text.lower()
        
    # Replace emojis with names 
    text = emoji.demojize(text)
        
    # Add space between emojis and other characters
    ind = -2
    for c in range(text.count(':')):
        ind = text.find(':',ind+2)
        if c%2==0:
            newLetter = ' :'
        else:
            newLetter = ': '
        text ="".join((text[:ind],newLetter,text[ind+1:]))
            
    # Replace emoji names with spanish meaning
    result = []
    parts = text.split(' ')
    for part in parts:
        if part:
            if part[0]==':':
                em = handle_emoji_tone(part)
                em = emoji_meaning(em)
                if em:
                    result.append(em)
            else:
                if part not in stop_words:
                    if part not in word_counts:
                        word_counts[part] = 1
                    else:
                        word_counts[part] += 1
                result.append(part)
        
    text = ' '.join(result)
        
    # Filter using NLTK library append it to a string
    word_tokens = word_tokenize(text)
    result = [w for w in word_tokens if not w in stop_words]
    text = ' '.join(result)
    
    # Check if text contains at least a word
    if len(text)>10:
        analysis = TextBlob(text)
        try:
            # Sentiment analysis
            eng = analysis.translate(to='en')
            sentiment = eng.sentiment     
            polarity = sentiment.polarity
            subjectivity = sentiment.subjectivity
            # Sleep to comply with google translate's rate limit per 100 seconds
            time.sleep(.39)
        except Exception as e:
            # Daily rate limit reached, pause execution to restart with new IP
            if str(e)=='HTTP Error 429: Too Many Requests':
                print('Error')
                print('Last user:', user_id)
                exit()
            # If there is a problem with translation assign neutral sentiment and subjectivity
            else:
                polarity = 0.0
                subjectivity = 0.0
    
    else:
        # Neutral polarity and subjectivity if text does not contain a large enough message
        polarity = 0.0
        subjectivity = 0.0
    

    return text, length, polarity, subjectivity

tones = ['_light_skin_tone','_medium-light_skin_tone','_medium_skin_tone', 
             '_medium-dark_skin_tone','_dark_skin_tone']

# Method that removes the tone from emojis
def handle_emoji_tone(emoji):  
    for t in tones:
        if t in emoji:
            tone = t
            return emoji.replace(tone,'')
    return emoji


In [5]:
# Import user data
user_info=pd.read_csv('https://raw.githubusercontent.com/cpenalozag/twitter_network/master/network-data/user_info.csv')
user_info["id"] = pd.to_numeric(user_info["id"])
user_ids = list(user_info['id'])
user_info.head()

Unnamed: 0,id,katz,betweenness,authority,screen_name,followers,friends,verified,created_at,listed,tipo
0,426146744.0,1.58,0.0,0.0,SteevenOrozco,655637,476,False,2011,842,opinion
1,162926902.0,3.46,0.01,0.0,VivaAirCol,210761,30037,False,2010,475,empresa
2,167435593.0,4.89,0.0,0.0,Dimayor,1314384,147,True,2010,1308,competencia
3,269039712.0,1.38,0.0,0.0,cecorrehumor,156975,392,False,2011,183,entretenimiento
4,62561348.0,2.05,0.0,0.0,ReddsColombia,25319,2517,True,2009,62,empresa


## Tweet data processing

In [None]:
# List to hold part of the dataset
text_analysis = []
text_analysis.append(['id', 'tweet_id','engagement',
            'effective_length', 'polarity', 'subjectivity'])

# List to store average engagement
avg_eng = []
avg_eng.append(['id', 'average_engagement', 'common_words', 'common_ht_words',
               'common_hts'])

''' Text analysis '''

last_uid = 968649338
found_start = False

for user_id in user_ids:
    if not found_start and user_id != last_uid:
        continue
    else:
        found_start = True
    
    # Total engagement
    eng = 0
    tweet_count = 0
    
    # Frequent word dictionary
    freq_words = {}

    # Frequent hashtag dictionary
    freq_hashtags = {}

    # Frequent words in hashtags dictionary
    freq_ht_words = {}
    
    user_tweets = tweets.loc[tweets['id'] == user_id]
    for index, row in user_tweets.iterrows():
      
        # Add current engagement
        currEng = int(row['favorite_count'])+int(row['retweet_count'])
        eng += currEng
        tweet_count +=1
        
        # Get hashtags
        ht = row['hashtags'].split(';') if row['hashtags'] != '-' else []
        
        # Update hashtag count
        for hashtag in ht:
            if hashtag not in freq_hashtags:
                freq_hashtags[hashtag] = 1
            else:
                freq_hashtags[hashtag] += 1
        
        # Separate hashtags by capitalization
        ht_words = [re.findall('[a-zA-Z][^A-Z]*', w) for w in ht]
        
        # Create a list with all the words in the hashtags
        hts = []
        for h in ht_words:
            hts = hts + h
        
        # Remove digits and lower caps for every hashtag word
        hts = [item.translate(remove_digits).lower() for item in hts]
        
        # Update hashtag word frequencies
        for word in hts:
            if word not in stop_words:
                if word not in freq_ht_words:
                    freq_ht_words[word] = 1
                else:
                    freq_ht_words[word] += 1
        
        text, length, polarity, subjectivity = process_text(row['text'], freq_words, user_id)
        
        # Update text in  data frame
        text_analysis.append([user_id, row['tweet_id'], currEng, length, polarity, subjectivity])
    
    print(tweet_count, 'tweets analysed for user', user_id)
    if tweet_count > 0:
        average_engagement = eng / tweet_count
    else:
        average_engagement = 0
    c_words = Counter(freq_words)
    c_ht_words = Counter(freq_ht_words)
    c_hashtags = Counter(freq_hashtags)
    avg_eng.append([user_id, average_engagement, c_words.most_common(3),
                   c_ht_words.most_common(3), c_hashtags.most_common(3)])

142 tweets analysed for user 968649338.0
68 tweets analysed for user 37462487.0
0 tweets analysed for user 8.23e+17
0 tweets analysed for user 7.74e+17
49 tweets analysed for user 152010594.0
18 tweets analysed for user 107116858.0
33 tweets analysed for user 153180494.0
139 tweets analysed for user 62959847.0
165 tweets analysed for user 309295164.0
93 tweets analysed for user 143490635.0
184 tweets analysed for user 135953646.0
289 tweets analysed for user 157859662.0
34 tweets analysed for user 127925615.0
0 tweets analysed for user 8.67e+17
181 tweets analysed for user 86201320.0
85 tweets analysed for user 570274533.0
134 tweets analysed for user 177980964.0
65 tweets analysed for user 327670033.0
128 tweets analysed for user 56724999.0
115 tweets analysed for user 177641139.0
122 tweets analysed for user 55379094.0
114 tweets analysed for user 184828053.0
8 tweets analysed for user 2175213942.0
90 tweets analysed for user 1112004948.0
0 tweets analysed for user 9.94e+17
156 tweet

In [30]:
# Used to remove data about last user being processed when execution stopped due to rate limit
print(len(text_analysis), len(avg_eng))
print(avg_eng[-1])
last_id = 968649338.0
text_analysis = [l for l in text_analysis if not last_id in l]
avg_eng = [l for l in avg_eng if not last_id in l]
print(len(text_analysis), len(avg_eng))

13945 135
[438407794.0, 11.04225352112676, [('ética', 39), ('red', 37), ('tuitdebate', 20)], [('tica', 37), ('red', 37), ('gabo', 8)], [('ÉticaEnRed', 37), ('desinformación', 8), ('periodismo', 6)]]
13945 135


In [31]:
# Save tweet analysis in a dataframe
analysis_results = pd.DataFrame(text_analysis[1:])
analysis_results.columns = text_analysis[0]
analysis_results["id"] = pd.to_numeric(analysis_results["id"])
analysis_results.head()

Unnamed: 0,id,tweet_id,engagement,effective_length,polarity,subjectivity
0,1932444973.0,1107835986975820000,1,0,0.0,0.0
1,1932444973.0,1107784445648660000,10,24,0.0,0.0
2,1932444973.0,1107742545700880000,1,0,0.0,0.0
3,1932444973.0,1107730108499020000,0,0,0.0,0.0
4,1932444973.0,1107700037168050000,5,15,0.0,0.0


In [32]:
# Save user engagement in a dataframe
user_engagement = pd.DataFrame(avg_eng[1:])
user_engagement.columns = avg_eng[0]
user_engagement["id"] = pd.to_numeric(user_engagement["id"])
user_engagement.head()

Unnamed: 0,id,average_engagement,common_words,common_ht_words,common_hts
0,1932444973.0,23.9,"[(muevo, 13), (representante, 9), (senador, 7)]","[(muevo, 13), (feliz, 5), (glifosato, 3)]","[(YoMeMuevo, 13), (ElGlifosatoMata, 2), (Movil..."
1,390845923.0,172.92,"[(viajando, 36), (colombia, 36), (amigo, 23)]","[(colombia, 31), (mascotas, 6), (posudas, 6)]","[(colombia, 17), (Colombia, 14), (SpamDeMascot..."
2,156456941.0,194.2,"[(ve, 46), (páramo, 9), (salva, 6)]","[(páramo, 7), (salva, 6), (chinayears, 2)]","[(SalvaMiPáramo, 6), (China70years, 2), (DiaMu..."
3,38022021.0,4.54,"[(summit, 59), (colombia, 55), (digital, 47)]","[(summit, 59), (colombia, 53), (inteligencia, ...","[(CIOSummit2019, 59), (Colombia40, 51), (Intel..."
4,86470793.0,1045.9,"[(premios, 5), (lunes, 4), (miaw, 4)]","[(premios, 6), (lunes, 2), (miaw, 2)]","[(MTVLAROSLAMAFE, 11), (PremiosMTVMIAW, 3), (P..."


In [33]:
# Merge tweet analysis and user engagement in results data frame
analysis_results = pd.merge(analysis_results, user_engagement, on='id')
print(len(analysis_results))
analysis_results.head()

13936


Unnamed: 0,id,tweet_id,engagement,effective_length,polarity,subjectivity,average_engagement,common_words,common_ht_words,common_hts
0,1932444973.0,1107835986975820000,1,0,0.0,0.0,23.9,"[(muevo, 13), (representante, 9), (senador, 7)]","[(muevo, 13), (feliz, 5), (glifosato, 3)]","[(YoMeMuevo, 13), (ElGlifosatoMata, 2), (Movil..."
1,1932444973.0,1107784445648660000,10,24,0.0,0.0,23.9,"[(muevo, 13), (representante, 9), (senador, 7)]","[(muevo, 13), (feliz, 5), (glifosato, 3)]","[(YoMeMuevo, 13), (ElGlifosatoMata, 2), (Movil..."
2,1932444973.0,1107742545700880000,1,0,0.0,0.0,23.9,"[(muevo, 13), (representante, 9), (senador, 7)]","[(muevo, 13), (feliz, 5), (glifosato, 3)]","[(YoMeMuevo, 13), (ElGlifosatoMata, 2), (Movil..."
3,1932444973.0,1107730108499020000,0,0,0.0,0.0,23.9,"[(muevo, 13), (representante, 9), (senador, 7)]","[(muevo, 13), (feliz, 5), (glifosato, 3)]","[(YoMeMuevo, 13), (ElGlifosatoMata, 2), (Movil..."
4,1932444973.0,1107700037168050000,5,15,0.0,0.0,23.9,"[(muevo, 13), (representante, 9), (senador, 7)]","[(muevo, 13), (feliz, 5), (glifosato, 3)]","[(YoMeMuevo, 13), (ElGlifosatoMata, 2), (Movil..."


In [34]:
# Write results to file
file_name = 'analysis_results.csv'
if not os.path.exists(file_name):
    analysis_results.to_csv(file_name, index=False)
else:
    analysis_results.to_csv(file_name, mode='a', header=False, index=False)

## Merging gathered data to create final dataset


In [15]:
# Import sentiment analysis results
sentiment_results = pd.read_csv('https://raw.githubusercontent.com/cpenalozag/twitter_network/master/dataset/analysis_results.csv')
sentiment_results["id"] = pd.to_numeric(sentiment_results["id"])
sentiment_results["tweet_id"] = pd.to_numeric(sentiment_results["tweet_id"])
print(len(sentiment_results))
sentiment_results.head()

193959


Unnamed: 0,id,tweet_id,engagement,effective_length,polarity,subjectivity,average_engagement,common_words,common_ht_words,common_hts
0,269039712.0,1.17269e+18,40,171,0.0,0.0,18.087379,"[('gracias', 33), ('abrazo', 17), ('amigo', 14)]","[('feliz', 6), ('martes', 3), ('tbt', 2)]","[('FelizMartes', 3), ('felizcumpleañosbogotá',..."
1,269039712.0,1.17221e+18,5,91,0.0,0.0,18.087379,"[('gracias', 33), ('abrazo', 17), ('amigo', 14)]","[('feliz', 6), ('martes', 3), ('tbt', 2)]","[('FelizMartes', 3), ('felizcumpleañosbogotá',..."
2,269039712.0,1.1722e+18,65,28,0.0,0.0,18.087379,"[('gracias', 33), ('abrazo', 17), ('amigo', 14)]","[('feliz', 6), ('martes', 3), ('tbt', 2)]","[('FelizMartes', 3), ('felizcumpleañosbogotá',..."
3,269039712.0,1.17216e+18,16,189,0.2,0.25,18.087379,"[('gracias', 33), ('abrazo', 17), ('amigo', 14)]","[('feliz', 6), ('martes', 3), ('tbt', 2)]","[('FelizMartes', 3), ('felizcumpleañosbogotá',..."
4,269039712.0,1.17216e+18,10,51,-0.1,0.5,18.087379,"[('gracias', 33), ('abrazo', 17), ('amigo', 14)]","[('feliz', 6), ('martes', 3), ('tbt', 2)]","[('FelizMartes', 3), ('felizcumpleañosbogotá',..."


In [16]:
user_info.head()

Unnamed: 0,id,katz,betweenness,authority,screen_name,followers,friends,verified,created_at,listed,tipo
0,426146744.0,1.580982,0.000668,2.9e-05,SteevenOrozco,655637,476,False,2011,842,opinion
1,162926902.0,3.461433,0.00576,0.000456,VivaAirCol,210761,30037,False,2010,475,empresa
2,167435593.0,4.891715,0.003585,0.000369,Dimayor,1314384,147,True,2010,1308,competencia
3,269039712.0,1.37557,0.000193,2.3e-05,cecorrehumor,156975,392,False,2011,183,entretenimiento
4,62561348.0,2.050953,0.000549,0.000141,ReddsColombia,25319,2517,True,2009,62,empresa


In [17]:
# Remove unnecessary columns from tweets dataset
tweets = tweets.drop(['text','phone','hashtags','mentions'], axis=1)
tweets.head()

Unnamed: 0,id,tweet_id,created_at,favorite_count,retweet_count,sensitive,no_hashtags,no_mentions,no_urls,no_media
0,113127283.0,1.17252e+18,9/13/19 14:56,1,0,False,0,6,0,1
1,113127283.0,1.17252e+18,9/13/19 14:54,2,0,False,0,5,0,1
2,113127283.0,1.17252e+18,9/13/19 14:52,1,0,False,0,6,0,1
3,113127283.0,1.17252e+18,9/13/19 14:50,0,0,False,0,6,0,1
4,113127283.0,1.17252e+18,9/13/19 14:48,0,0,False,0,4,0,1


In [18]:
# Merge tweet data with sentiment analysis data
processed_data = pd.merge(sentiment_results, tweets, on='tweet_id')
print(len(processed_data))
processed_data.head()

6567365


Unnamed: 0,id_x,tweet_id,engagement,effective_length,polarity,subjectivity,average_engagement,common_words,common_ht_words,common_hts,id_y,created_at,favorite_count,retweet_count,sensitive,no_hashtags,no_mentions,no_urls,no_media
0,269039712.0,1.17269e+18,40,171,0.0,0.0,18.087379,"[('gracias', 33), ('abrazo', 17), ('amigo', 14)]","[('feliz', 6), ('martes', 3), ('tbt', 2)]","[('FelizMartes', 3), ('felizcumpleañosbogotá',...",8.21785e+17,9/14/19 1:59,735,35,False,0,0,1,0
1,269039712.0,1.17269e+18,40,171,0.0,0.0,18.087379,"[('gracias', 33), ('abrazo', 17), ('amigo', 14)]","[('feliz', 6), ('martes', 3), ('tbt', 2)]","[('FelizMartes', 3), ('felizcumpleañosbogotá',...",374557400.0,9/14/19 2:09,4,3,False,1,0,0,1
2,269039712.0,1.17269e+18,40,171,0.0,0.0,18.087379,"[('gracias', 33), ('abrazo', 17), ('amigo', 14)]","[('feliz', 6), ('martes', 3), ('tbt', 2)]","[('FelizMartes', 3), ('felizcumpleañosbogotá',...",422295200.0,9/14/19 1:39,16,1,False,0,0,0,1
3,269039712.0,1.17269e+18,40,171,0.0,0.0,18.087379,"[('gracias', 33), ('abrazo', 17), ('amigo', 14)]","[('feliz', 6), ('martes', 3), ('tbt', 2)]","[('FelizMartes', 3), ('felizcumpleañosbogotá',...",295876800.0,9/14/19 1:36,42,3,False,2,0,0,1
4,269039712.0,1.17269e+18,40,171,0.0,0.0,18.087379,"[('gracias', 33), ('abrazo', 17), ('amigo', 14)]","[('feliz', 6), ('martes', 3), ('tbt', 2)]","[('FelizMartes', 3), ('felizcumpleañosbogotá',...",295876800.0,9/14/19 1:35,64,6,False,1,2,0,1


In [0]:
# Check last merge with lenghts to avoid duplicates

In [0]:
# Merge user and tweet data
processed_data = pd.merge(sentiment_results, analysis_results, on='id')
processed_data.head()

Unnamed: 0,id,katz,betweenness,authority,screen_name,followers,friends,verified,created_at,listed,tipo,average_engagement,common_words,common_ht_words,common_hts,tweet_id,engagement,effective_length,polarity,subjectivity
0,426146744.0,1.580982,0.000668,2.9e-05,SteevenOrozco,655637,476,False,2011,842,opinion,457.25,"[(creyentes, 4), (bavaria, 4), (rt, 3)]","[(creyentes, 4), (bavaria, 3), (efecty, 1)]","[(Creyentes, 4), (MeUnoBavaria, 3), (EfectyElG...",1.14285e+18,42,125,0.6,0.8
1,426146744.0,1.580982,0.000668,2.9e-05,SteevenOrozco,655637,476,False,2011,842,opinion,457.25,"[(creyentes, 4), (bavaria, 4), (rt, 3)]","[(creyentes, 4), (bavaria, 3), (efecty, 1)]","[(Creyentes, 4), (MeUnoBavaria, 3), (EfectyElG...",1.14257e+18,1441,62,0.0,0.0
2,426146744.0,1.580982,0.000668,2.9e-05,SteevenOrozco,655637,476,False,2011,842,opinion,457.25,"[(creyentes, 4), (bavaria, 4), (rt, 3)]","[(creyentes, 4), (bavaria, 3), (efecty, 1)]","[(Creyentes, 4), (MeUnoBavaria, 3), (EfectyElG...",1.14149e+18,611,20,0.2,0.4
3,426146744.0,1.580982,0.000668,2.9e-05,SteevenOrozco,655637,476,False,2011,842,opinion,457.25,"[(creyentes, 4), (bavaria, 4), (rt, 3)]","[(creyentes, 4), (bavaria, 3), (efecty, 1)]","[(Creyentes, 4), (MeUnoBavaria, 3), (EfectyElG...",1.14142e+18,16,184,0.366667,0.5
4,426146744.0,1.580982,0.000668,2.9e-05,SteevenOrozco,655637,476,False,2011,842,opinion,457.25,"[(creyentes, 4), (bavaria, 4), (rt, 3)]","[(creyentes, 4), (bavaria, 3), (efecty, 1)]","[(Creyentes, 4), (MeUnoBavaria, 3), (EfectyElG...",1.14113e+18,180,261,-0.6,0.9
5,426146744.0,1.580982,0.000668,2.9e-05,SteevenOrozco,655637,476,False,2011,842,opinion,457.25,"[(creyentes, 4), (bavaria, 4), (rt, 3)]","[(creyentes, 4), (bavaria, 3), (efecty, 1)]","[(Creyentes, 4), (MeUnoBavaria, 3), (EfectyElG...",1.14e+18,39,45,0.3,0.2
6,426146744.0,1.580982,0.000668,2.9e-05,SteevenOrozco,655637,476,False,2011,842,opinion,457.25,"[(creyentes, 4), (bavaria, 4), (rt, 3)]","[(creyentes, 4), (bavaria, 3), (efecty, 1)]","[(Creyentes, 4), (MeUnoBavaria, 3), (EfectyElG...",1.13898e+18,16,126,0.416667,0.616667
7,426146744.0,1.580982,0.000668,2.9e-05,SteevenOrozco,655637,476,False,2011,842,opinion,457.25,"[(creyentes, 4), (bavaria, 4), (rt, 3)]","[(creyentes, 4), (bavaria, 3), (efecty, 1)]","[(Creyentes, 4), (MeUnoBavaria, 3), (EfectyElG...",1.13811e+18,61,89,0.0,0.0
8,426146744.0,1.580982,0.000668,2.9e-05,SteevenOrozco,655637,476,False,2011,842,opinion,457.25,"[(creyentes, 4), (bavaria, 4), (rt, 3)]","[(creyentes, 4), (bavaria, 3), (efecty, 1)]","[(Creyentes, 4), (MeUnoBavaria, 3), (EfectyElG...",1.13569e+18,20,183,-0.145833,0.875
9,426146744.0,1.580982,0.000668,2.9e-05,SteevenOrozco,655637,476,False,2011,842,opinion,457.25,"[(creyentes, 4), (bavaria, 4), (rt, 3)]","[(creyentes, 4), (bavaria, 3), (efecty, 1)]","[(Creyentes, 4), (MeUnoBavaria, 3), (EfectyElG...",1.13091e+18,14,148,0.383333,0.6


In [0]:
# Write processed data to file
processed_data.to_csv('processed_data.csv', index=False)