In [13]:
!ls ..


[34mNotebooks[m[m              melborune_search_json  user_tracker.py
[34m__pycache__[m[m            searcher.py            utils.py
geelong_search_json    streamer.py
key_word_ids.json      twitter_credentials.py


In [14]:
import json
import pandas as pd
import numpy as np
import re

def make_df_from_tweets(tweets):
    """
    Params
    -------------------------
    tweets : list of jsons

    Returns
    -------------------------
    pd.Dataframe
    """
    df = pd.DataFrame(
        data=[tweet['text'] for tweet in tweets], columns=['tweets'])
    df['screen_name'] = np.array(
        [tweet['user']["screen_name"] for tweet in tweets])
    df["user_id"] = np.array(
        [tweet['user']['id'] for tweet in tweets])
    df["user_location"] = np.array(
        [tweet['user']['location'] for tweet in tweets])
    df['id'] = np.array([tweet['id'] for tweet in tweets])
    df['len'] = np.array([len(tweet['text']) for tweet in tweets])
    df['date'] = np.array([tweet['created_at'] for tweet in tweets])
    df['source'] = np.array(
        [re.sub('<[^<]+?>', '', tweet['source']) for tweet in tweets])
    df['likes'] = np.array([tweet['favorite_count'] for tweet in tweets])
    df['retweets'] = np.array([tweet['retweet_count'] for tweet in tweets])
    df['coor'] = np.array([tweet["coordinates"] for tweet in tweets])

    df['hashtags'] = np.array([tweet['entities']['hashtags']
                               for tweet in tweets])
    return df

def read_dat(file_path):
    
    lines = []
    with open(file_path) as f:
    
        for line in f.readlines():
            line = line.strip()
            lines.append(json.loads(line))
    return lines


    

with open("../melborune_search_json") as f:
    
    for line in f.readlines():
        line = line.strip()
        lines.append(json.loads(line))

geelong_dat = []

with open("../geelong_search_json") as f:
    
    for line in f.readlines():
        line = line.strip()
        geelong_dat.append(json.loads(line)) 

        
melbourne_df = make_df_from_tweets(lines)
geelong_df = make_df_from_tweets(geelong_dat)

In [15]:
print(geelong_df.shape)
print(melbourne_df.shape)

(3391, 12)
(12794, 12)


In [23]:
from textblob import TextBlob

def is_negative_sentiment(text):
    
    blob = TextBlob(text)
    
    score = blob.sentiment.polarity
    
    if score < 0:
        return 1
    else:
        return 0

In [24]:
geelong_df['is_negative'] = geelong_df['tweets'].map(lambda x:is_negative_sentiment(x))
geelong_df.head()

Unnamed: 0,tweets,screen_name,user_id,user_location,id,len,date,source,likes,retweets,coor,hashtags,is_negative
0,RT @RichardMarlesMP: Congratulations to the no...,billmithen,946700910,,1124169236362874880,139,Fri May 03 04:30:06 +0000 2019,Twitter for iPhone,0,6,,[],0
1,Pre poll is heaving #auspol #corangamite \n\nT...,thecattery,31690313,"Geelong, Victoria, Australia",1124165391213662208,140,Fri May 03 04:14:49 +0000 2019,Twitter for Android,0,0,,"[{'text': 'auspol', 'indices': [20, 27]}, {'te...",0
2,"RT @RichardMarlesMP: ""All we've had here when ...",OtwaysVic,4665184285,"South West, Victoria",1124162334757347328,140,Fri May 03 04:02:41 +0000 2019,Twitter Web Client,0,2,,[],0
3,Bloody hell @RichardMarlesMP is out in Belmont...,thecattery,31690313,"Geelong, Victoria, Australia",1124159694900060160,112,Fri May 03 03:52:11 +0000 2019,Twitter for Android,0,0,,"[{'text': 'Corio', 'indices': [61, 67]}, {'tex...",1
4,"RT @zacpower01: Australia is a secular, plural...",chomskyrose,1098537850159620096,"Perth, Western Australia",1124154346864693249,140,Fri May 03 03:30:56 +0000 2019,Twitter for iPhone,0,7,,[],0


In [25]:
melbourne_df['is_negative'] = melbourne_df['tweets'].map(lambda x:is_negative_sentiment(x))


In [27]:
geelong_df['is_negative'].sum() / geelong_df.shape[0]

0.18460631082276616

In [28]:
melbourne_df['is_negative'].sum() / melbourne_df.shape[0]

0.19290292324527122

In [42]:
melbourne_df.groupby('source').count()['tweets'].sort_values()[-5:]

source
Twitter Web App        1166
Twitter for iPad       1454
Twitter Web Client     2319
Twitter for Android    3036
Twitter for iPhone     4241
Name: tweets, dtype: int64

In [43]:
geelong_df.groupby('source').count()['tweets'].sort_values()[-5:]

source
Twitter Web App         256
Twitter for iPad        531
Twitter Web Client      681
Twitter for Android     801
Twitter for iPhone     1065
Name: tweets, dtype: int64