In [8]:
import os
import sys
import pandas as pd
import pmdarima as pm
import matplotlib.pyplot as plt
from nltk.stem import WordNetLemmatizer, LancasterStemmer, PorterStemmer
from nltk.corpus import stopwords
from nltk.tokenize import TweetTokenizer
from sklearn.feature_extraction.text import TfidfVectorizer

In [9]:
sys.path.append(r'D:\OneDrive\Programming\documents_python\NUS Courses\DBA5106\Course-DBA5106\group_project\gp3')
from utils.logger import logger
from utils.config import RAW_DATA_DIR, PROCESSED_DATA_DIR, FIGURE_DIR
from utils.data_porter import save_to_csv, read_from_csv

# Data Aggregation

In [10]:
user_tweets = read_from_csv('user_tweets_final.csv', PROCESSED_DATA_DIR)

In [11]:
user_tweets

Unnamed: 0,follower_count,following_count,account_creation_date,tweet_num,tweet_text,state_back
0,379,383,2020-01-31,45,"RT @Amelia12610461: #February5, The Cuban cycl...",1
1,224,253,2020-03-24,6,@DiazCanelB Celia Sánchez occupies a significa...,1
2,535,1144,2019-01-28,9048,@soniagupta504 Don`t worry everithing will be ...,1
3,827,740,2020-01-25,165,RT @DiazCanelB: State terrorism openly declare...,1
4,43,304,2019-02-22,1,RT @DiazCanelB: Patient ill with Covid-19: pre...,1
...,...,...,...,...,...,...
44013,4274,3998,2015-09-15,845,RT @avucic: Deeply saddened by the terrible ne...,1
44014,2169,2362,2016-02-22,1372,RT @MarionSpekker: @huskorkut \nGood morning \...,1
44015,970,1097,2015-08-04,96,"RT @avucic: With @PHammondMP, PM Djukanovic, @...",1
44016,664,973,2016-11-09,8,"RT @avucic: @sebastiankurz Thank you, dear fri...",1


In [12]:
user_tweets.rename(columns={'tweet_num': 'tweets_count', 'follower_count':'followers_count','tweet_text':'tweets_content'}, inplace=True)
user_tweets = user_tweets[['followers_count', 'following_count', 'tweets_count', 'tweets_content', 'state_back']]
user_tweets

Unnamed: 0,followers_count,following_count,tweets_count,tweets_content,state_back
0,379,383,45,"RT @Amelia12610461: #February5, The Cuban cycl...",1
1,224,253,6,@DiazCanelB Celia Sánchez occupies a significa...,1
2,535,1144,9048,@soniagupta504 Don`t worry everithing will be ...,1
3,827,740,165,RT @DiazCanelB: State terrorism openly declare...,1
4,43,304,1,RT @DiazCanelB: Patient ill with Covid-19: pre...,1
...,...,...,...,...,...
44013,4274,3998,845,RT @avucic: Deeply saddened by the terrible ne...,1
44014,2169,2362,1372,RT @MarionSpekker: @huskorkut \nGood morning \...,1
44015,970,1097,96,"RT @avucic: With @PHammondMP, PM Djukanovic, @...",1
44016,664,973,8,"RT @avucic: @sebastiankurz Thank you, dear fri...",1


In [13]:
user_tweets_nega = read_from_csv('user_tweets_final_nega.csv', PROCESSED_DATA_DIR)

In [14]:
user_tweets_nega.rename(columns={'statuses_count': 'tweets_count'}, inplace=True)
user_tweets_nega = user_tweets_nega[['followers_count', 'following_count', 'tweets_count', 'tweets_content', 'state_back']]
user_tweets_nega

Unnamed: 0,followers_count,following_count,tweets_count,tweets_content,state_back
0,4155.0,4395.0,12823.0,Helping out @ Corner Stone tonight @HoptonHou...,0
1,438.0,757.0,515.0,@iheartfez6 hey girl . are you home?we lost yo...,0
2,4295.0,4728.0,16282.0,@DevourerofBooks YES!!! I met her after the bo...,0
3,559.0,15.0,5991.0,@Biiiancaaaa What if that is what she said. @...,0
4,877.0,415.0,11320.0,@ROXPromotions I'll see you in the finals with...,0
...,...,...,...,...,...
74817,5.0,16.0,10.0,just checked into the @rogersmithhotel -- che...,0
74818,43.0,38.0,343.0,"swap meet was fun today, summer time! even tho...",0
74819,4.0,84.0,51.0,@Audio_Rush oh wells. i can complain all i wan...,0
74820,0.0,14.0,2.0,I ate too many carrots Trying to knockout thi...,0


In [15]:
user_tweets_nega = user_tweets_nega.loc[user_tweets_nega['tweets_count'] <= 500]

In [16]:
user_tweets_all = pd.concat([user_tweets, user_tweets_nega])

In [18]:
user_tweets_all

Unnamed: 0,followers_count,following_count,tweets_count,tweets_content,state_back
0,379.0,383.0,45.0,"RT @Amelia12610461: #February5, The Cuban cycl...",1
1,224.0,253.0,6.0,@DiazCanelB Celia Sánchez occupies a significa...,1
2,535.0,1144.0,9048.0,@soniagupta504 Don`t worry everithing will be ...,1
3,827.0,740.0,165.0,RT @DiazCanelB: State terrorism openly declare...,1
4,43.0,304.0,1.0,RT @DiazCanelB: Patient ill with Covid-19: pre...,1
...,...,...,...,...,...
74817,5.0,16.0,10.0,just checked into the @rogersmithhotel -- che...,0
74818,43.0,38.0,343.0,"swap meet was fun today, summer time! even tho...",0
74819,4.0,84.0,51.0,@Audio_Rush oh wells. i can complain all i wan...,0
74820,0.0,14.0,2.0,I ate too many carrots Trying to knockout thi...,0


In [19]:
save_to_csv(user_tweets_all, 'user_tweets_final_all.csv', PROCESSED_DATA_DIR)

In [27]:
# Save the partial data to csv file for data exploration analysis
save_to_csv(pd.concat([user_tweets[:500], user_tweets_nega[:500]]), 'user_tweets_final_all_1000.csv', PROCESSED_DATA_DIR)
save_to_csv(pd.concat([user_tweets[:5000], user_tweets_nega[:5000]]), 'user_tweets_final_all_10000.csv', PROCESSED_DATA_DIR)

# Data Vectorization

In [11]:
user_tweets_all['tweets_content'] = user_tweets_all.apply(lambda row: row['tweets_content'].lower(), axis=1)
user_tweets_all

Unnamed: 0,followers_count,following_count,tweets_count,tweets_content,state_back
0,379.0,383.0,45.0,"rt @amelia12610461: #february5, the cuban cycl...",1
1,224.0,253.0,6.0,@diazcanelb celia sánchez occupies a significa...,1
2,535.0,1144.0,9048.0,@soniagupta504 don`t worry everithing will be ...,1
3,827.0,740.0,165.0,rt @diazcanelb: state terrorism openly declare...,1
4,43.0,304.0,1.0,rt @diazcanelb: patient ill with covid-19: pre...,1
...,...,...,...,...,...
74817,5.0,16.0,10.0,just checked into the @rogersmithhotel -- che...,0
74818,43.0,38.0,343.0,"swap meet was fun today, summer time! even tho...",0
74819,4.0,84.0,51.0,@audio_rush oh wells. i can complain all i wan...,0
74820,0.0,14.0,2.0,i ate too many carrots trying to knockout thi...,0


In [12]:
tweet_tokenizer = TweetTokenizer()
user_tweets_all['tweet_tokenized'] = user_tweets_all.apply(lambda row: tweet_tokenizer.tokenize(row['tweets_content']), axis=1)

In [13]:
def filter_alpha(word_tokens):
    return [w for w in word_tokens if w.isalpha()]
user_tweets_all['tweet_tokenized'] = user_tweets_all.apply(lambda row: filter_alpha(row['tweet_tokenized']), axis=1)

In [14]:
porter = PorterStemmer()
def stemming(word_tokens):
    return [porter.stem(w) for w in word_tokens]
user_tweets_all['tweet_tokenized_stem'] = user_tweets_all.apply(lambda row: stemming(row['tweet_tokenized']), axis=1)

In [15]:
def lemmatize(word_tokens):
    return [lemmatizer.lemmatize(w) for w in word_tokens]
lemmatizer = WordNetLemmatizer()
user_tweets_all['tweet_tokenized_lemma'] = user_tweets_all.apply(lambda row: lemmatize(row['tweet_tokenized_stem']), axis=1)

In [16]:
def filer_stopword(word_tokens):
    return [w for w in word_tokens if not w in stop_words]
stop_words = set(stopwords.words('english'))
user_tweets_all['tweet_tokenized_filtered'] = user_tweets_all.apply(lambda row: filer_stopword(row['tweet_tokenized_lemma']), axis=1)

In [17]:
user_tweets_all['tweet_clean'] = user_tweets_all.apply(lambda row: ' '.join(row['tweet_tokenized_filtered']), axis=1)

In [18]:
user_tweets_all

Unnamed: 0,followers_count,following_count,tweets_count,tweets_content,state_back,tweet_tokenized,tweet_tokenized_stem,tweet_tokenized_lemma,tweet_tokenized_filtered,tweet_clean
0,379.0,383.0,45.0,"rt @amelia12610461: #february5, the cuban cycl...",1,"[rt, the, cuban, cyclist, arlenis, sierra, won...","[rt, the, cuban, cyclist, arleni, sierra, won,...","[rt, the, cuban, cyclist, arleni, sierra, won,...","[rt, cuban, cyclist, arleni, sierra, first, pl...",rt cuban cyclist arleni sierra first place fir...
1,224.0,253.0,6.0,@diazcanelb celia sánchez occupies a significa...,1,"[celia, sánchez, occupies, a, significant, pla...","[celia, sánchez, occupi, a, signific, place, i...","[celia, sánchez, occupi, a, signific, place, i...","[celia, sánchez, occupi, signific, place, hist...",celia sánchez occupi signific place histori cu...
2,535.0,1144.0,9048.0,@soniagupta504 don`t worry everithing will be ...,1,"[don, t, worry, everithing, will, be, ok, vsco...","[don, t, worri, everith, will, be, ok, vscode,...","[don, t, worri, everith, will, be, ok, vscode,...","[worri, everith, ok, vscode, best, editor, ani...",worri everith ok vscode best editor ani backen...
3,827.0,740.0,165.0,rt @diazcanelb: state terrorism openly declare...,1,"[rt, state, terrorism, openly, declared, again...","[rt, state, terror, openli, declar, against, v...","[rt, state, terror, openli, declar, against, v...","[rt, state, terror, openli, declar, venezuela,...",rt state terror openli declar venezuela defeat...
4,43.0,304.0,1.0,rt @diazcanelb: patient ill with covid-19: pre...,1,"[rt, patient, ill, with, covid, precautions, m...","[rt, patient, ill, with, covid, precaut, must,...","[rt, patient, ill, with, covid, precaut, must,...","[rt, patient, ill, covid, precaut, must, heigh...",rt patient ill covid precaut must heighten peo...
...,...,...,...,...,...,...,...,...,...,...
74817,5.0,16.0,10.0,just checked into the @rogersmithhotel -- che...,0,"[just, checked, into, the, checkin, but, hopef...","[just, check, into, the, checkin, but, hope, t...","[just, check, into, the, checkin, but, hope, t...","[check, checkin, hope, get, room, earli, need,...",check checkin hope get room earli need sleep m...
74818,43.0,38.0,343.0,"swap meet was fun today, summer time! even tho...",0,"[swap, meet, was, fun, today, summer, time, ev...","[swap, meet, wa, fun, today, summer, time, eve...","[swap, meet, wa, fun, today, summer, time, eve...","[swap, meet, wa, fun, today, summer, time, eve...",swap meet wa fun today summer time even though...
74819,4.0,84.0,51.0,@audio_rush oh wells. i can complain all i wan...,0,"[oh, wells, i, can, complain, all, i, want, no...","[oh, well, i, can, complain, all, i, want, now...","[oh, well, i, can, complain, all, i, want, now...","[oh, well, complain, want, aboutt, school, wan...",oh well complain want aboutt school want alrea...
74820,0.0,14.0,2.0,i ate too many carrots trying to knockout thi...,0,"[i, ate, too, many, carrots, trying, to, knock...","[i, ate, too, mani, carrot, tri, to, knockout,...","[i, ate, too, mani, carrot, tri, to, knockout,...","[ate, mani, carrot, tri, knockout, thi, last, ...",ate mani carrot tri knockout thi last paper fa...


In [19]:
vectorizer = TfidfVectorizer(max_df=0.3, min_df=40, max_features=400)
vectors = vectorizer.fit_transform(user_tweets_all['tweet_clean'])

In [20]:
feature_names = vectorizer.get_feature_names()

In [21]:
len(feature_names)

400

In [22]:
dense = vectors.todense()

In [23]:
denselist = dense.tolist()

In [24]:
df_tfidf = pd.DataFrame(denselist, columns=feature_names)

In [25]:
user_tweets_final = pd.merge(left=user_tweets_all, right=df_tfidf, left_index=True, right_index=True)

In [26]:
user_tweets_final

Unnamed: 0,followers_count,following_count,tweets_count,tweets_content,state_back,tweet_tokenized,tweet_tokenized_stem,tweet_tokenized_lemma,tweet_tokenized_filtered,tweet_clean,...,wow,write,wrong,ya,yay,ye,yeah,year,yesterday,yet
0,379.0,383.0,45.0,"rt @amelia12610461: #february5, the cuban cycl...",1,"[rt, the, cuban, cyclist, arlenis, sierra, won...","[rt, the, cuban, cyclist, arleni, sierra, won,...","[rt, the, cuban, cyclist, arleni, sierra, won,...","[rt, cuban, cyclist, arleni, sierra, first, pl...",rt cuban cyclist arleni sierra first place fir...,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.049154,0.000000,0.060629
1,224.0,253.0,6.0,@diazcanelb celia sánchez occupies a significa...,1,"[celia, sánchez, occupies, a, significant, pla...","[celia, sánchez, occupi, a, signific, place, i...","[celia, sánchez, occupi, a, signific, place, i...","[celia, sánchez, occupi, signific, place, hist...",celia sánchez occupi signific place histori cu...,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,535.0,1144.0,9048.0,@soniagupta504 don`t worry everithing will be ...,1,"[don, t, worry, everithing, will, be, ok, vsco...","[don, t, worri, everith, will, be, ok, vscode,...","[don, t, worri, everith, will, be, ok, vscode,...","[worri, everith, ok, vscode, best, editor, ani...",worri everith ok vscode best editor ani backen...,...,0.006021,0.047380,0.002811,0.000541,0.002049,0.008471,0.002790,0.043911,0.012778,0.011926
3,827.0,740.0,165.0,rt @diazcanelb: state terrorism openly declare...,1,"[rt, state, terrorism, openly, declared, again...","[rt, state, terror, openli, declar, against, v...","[rt, state, terror, openli, declar, against, v...","[rt, state, terror, openli, declar, venezuela,...",rt state terror openli declar venezuela defeat...,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.151733,0.025067,0.023394
4,43.0,304.0,1.0,rt @diazcanelb: patient ill with covid-19: pre...,1,"[rt, patient, ill, with, covid, precautions, m...","[rt, patient, ill, with, covid, precaut, must,...","[rt, patient, ill, with, covid, precaut, must,...","[rt, patient, ill, covid, precaut, must, heigh...",rt patient ill covid precaut must heighten peo...,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74817,5.0,16.0,10.0,just checked into the @rogersmithhotel -- che...,0,"[just, checked, into, the, checkin, but, hopef...","[just, check, into, the, checkin, but, hope, t...","[just, check, into, the, checkin, but, hope, t...","[check, checkin, hope, get, room, earli, need,...",check checkin hope get room earli need sleep m...,...,0.000000,0.000000,0.000000,0.056265,0.053328,0.000000,0.096813,0.000000,0.055420,0.103444
74818,43.0,38.0,343.0,"swap meet was fun today, summer time! even tho...",0,"[swap, meet, was, fun, today, summer, time, ev...","[swap, meet, wa, fun, today, summer, time, eve...","[swap, meet, wa, fun, today, summer, time, eve...","[swap, meet, wa, fun, today, summer, time, eve...",swap meet wa fun today summer time even though...,...,0.102729,0.116877,0.000000,0.110673,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
74819,4.0,84.0,51.0,@audio_rush oh wells. i can complain all i wan...,0,"[oh, wells, i, can, complain, all, i, want, no...","[oh, well, i, can, complain, all, i, want, now...","[oh, well, i, can, complain, all, i, want, now...","[oh, well, complain, want, aboutt, school, wan...",oh well complain want aboutt school want alrea...,...,0.090782,0.000000,0.101721,0.097803,0.000000,0.000000,0.000000,0.000000,0.096335,0.089906
74820,0.0,14.0,2.0,i ate too many carrots trying to knockout thi...,0,"[i, ate, too, many, carrots, trying, to, knock...","[i, ate, too, mani, carrot, tri, to, knockout,...","[i, ate, too, mani, carrot, tri, to, knockout,...","[ate, mani, carrot, tri, knockout, thi, last, ...",ate mani carrot tri knockout thi last paper fa...,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [27]:
user_tweets_final

Unnamed: 0,followers_count,following_count,tweets_count,tweets_content,state_back,tweet_tokenized,tweet_tokenized_stem,tweet_tokenized_lemma,tweet_tokenized_filtered,tweet_clean,...,wow,write,wrong,ya,yay,ye,yeah,year,yesterday,yet
0,379.0,383.0,45.0,"rt @amelia12610461: #february5, the cuban cycl...",1,"[rt, the, cuban, cyclist, arlenis, sierra, won...","[rt, the, cuban, cyclist, arleni, sierra, won,...","[rt, the, cuban, cyclist, arleni, sierra, won,...","[rt, cuban, cyclist, arleni, sierra, first, pl...",rt cuban cyclist arleni sierra first place fir...,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.049154,0.000000,0.060629
1,224.0,253.0,6.0,@diazcanelb celia sánchez occupies a significa...,1,"[celia, sánchez, occupies, a, significant, pla...","[celia, sánchez, occupi, a, signific, place, i...","[celia, sánchez, occupi, a, signific, place, i...","[celia, sánchez, occupi, signific, place, hist...",celia sánchez occupi signific place histori cu...,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,535.0,1144.0,9048.0,@soniagupta504 don`t worry everithing will be ...,1,"[don, t, worry, everithing, will, be, ok, vsco...","[don, t, worri, everith, will, be, ok, vscode,...","[don, t, worri, everith, will, be, ok, vscode,...","[worri, everith, ok, vscode, best, editor, ani...",worri everith ok vscode best editor ani backen...,...,0.006021,0.047380,0.002811,0.000541,0.002049,0.008471,0.002790,0.043911,0.012778,0.011926
3,827.0,740.0,165.0,rt @diazcanelb: state terrorism openly declare...,1,"[rt, state, terrorism, openly, declared, again...","[rt, state, terror, openli, declar, against, v...","[rt, state, terror, openli, declar, against, v...","[rt, state, terror, openli, declar, venezuela,...",rt state terror openli declar venezuela defeat...,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.151733,0.025067,0.023394
4,43.0,304.0,1.0,rt @diazcanelb: patient ill with covid-19: pre...,1,"[rt, patient, ill, with, covid, precautions, m...","[rt, patient, ill, with, covid, precaut, must,...","[rt, patient, ill, with, covid, precaut, must,...","[rt, patient, ill, covid, precaut, must, heigh...",rt patient ill covid precaut must heighten peo...,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74817,5.0,16.0,10.0,just checked into the @rogersmithhotel -- che...,0,"[just, checked, into, the, checkin, but, hopef...","[just, check, into, the, checkin, but, hope, t...","[just, check, into, the, checkin, but, hope, t...","[check, checkin, hope, get, room, earli, need,...",check checkin hope get room earli need sleep m...,...,0.000000,0.000000,0.000000,0.056265,0.053328,0.000000,0.096813,0.000000,0.055420,0.103444
74818,43.0,38.0,343.0,"swap meet was fun today, summer time! even tho...",0,"[swap, meet, was, fun, today, summer, time, ev...","[swap, meet, wa, fun, today, summer, time, eve...","[swap, meet, wa, fun, today, summer, time, eve...","[swap, meet, wa, fun, today, summer, time, eve...",swap meet wa fun today summer time even though...,...,0.102729,0.116877,0.000000,0.110673,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
74819,4.0,84.0,51.0,@audio_rush oh wells. i can complain all i wan...,0,"[oh, wells, i, can, complain, all, i, want, no...","[oh, well, i, can, complain, all, i, want, now...","[oh, well, i, can, complain, all, i, want, now...","[oh, well, complain, want, aboutt, school, wan...",oh well complain want aboutt school want alrea...,...,0.090782,0.000000,0.101721,0.097803,0.000000,0.000000,0.000000,0.000000,0.096335,0.089906
74820,0.0,14.0,2.0,i ate too many carrots trying to knockout thi...,0,"[i, ate, too, many, carrots, trying, to, knock...","[i, ate, too, mani, carrot, tri, to, knockout,...","[i, ate, too, mani, carrot, tri, to, knockout,...","[ate, mani, carrot, tri, knockout, thi, last, ...",ate mani carrot tri knockout thi last paper fa...,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [28]:
save_to_csv(user_tweets_final, 'user_tweets_final_numeric.csv', PROCESSED_DATA_DIR)

In [7]:
user_tweets_final = user_tweets_final.drop(columns=['tweets_content', 'tweet_tokenized','tweet_tokenized_stem','tweet_tokenized_lemma','tweet_tokenized_filtered', 'tweet_clean'])

In [8]:
user_tweets_final

Unnamed: 0,followers_count,following_count,tweets_count,state_back,abl,actual,ago,aleksandar,almost,alreadi,...,wow,write,wrong,ya,yay,ye,yeah,year,yesterday,yet
0,379.0,383.0,45.0,1,0.071711,0.000000,0.000000,0.0,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.049154,0.000000,0.060629
1,224.0,253.0,6.0,1,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,535.0,1144.0,9048.0,1,0.011755,0.005757,0.009084,0.0,0.011596,0.015488,...,0.006021,0.047380,0.002811,0.000541,0.002049,0.008471,0.002790,0.043911,0.012778,0.011926
3,827.0,740.0,165.0,1,0.055341,0.024641,0.053459,0.0,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.151733,0.025067,0.023394
4,43.0,304.0,1.0,1,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106592,5.0,16.0,10.0,0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.056265,0.053328,0.000000,0.096813,0.000000,0.055420,0.103444
106593,43.0,38.0,343.0,0,0.000000,0.000000,0.000000,0.0,0.113056,0.198191,...,0.102729,0.116877,0.000000,0.110673,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
106594,4.0,84.0,51.0,0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,...,0.090782,0.000000,0.101721,0.097803,0.000000,0.000000,0.000000,0.000000,0.096335,0.089906
106595,0.0,14.0,2.0,0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [9]:
save_to_csv(user_tweets_final, 'user_tweets_only_numeric.csv', PROCESSED_DATA_DIR)