In [13]:
!ls ..


[34mNotebooks[m[m              melborune_search_json  user_tracker.py
[34m__pycache__[m[m            searcher.py            utils.py
geelong_search_json    streamer.py
key_word_ids.json      twitter_credentials.py


In [3]:
import json
import pandas as pd
import numpy as np
import re

def make_df_from_tweets(tweets):
    """
    Params
    -------------------------
    tweets : list of jsons

    Returns
    -------------------------
    pd.Dataframe
    """
    df = pd.DataFrame(
        data=[tweet['text'] for tweet in tweets], columns=['tweets'])
    df['screen_name'] = np.array(
        [tweet['user']["screen_name"] for tweet in tweets])
    df["user_id"] = np.array(
        [tweet['user']['id'] for tweet in tweets])
    df["user_location"] = np.array(
        [tweet['user']['location'] for tweet in tweets])
    df['id'] = np.array([tweet['id'] for tweet in tweets])
    df['len'] = np.array([len(tweet['text']) for tweet in tweets])
    df['date'] = np.array([tweet['created_at'] for tweet in tweets])
    df['source'] = np.array(
        [re.sub('<[^<]+?>', '', tweet['source']) for tweet in tweets])
    df['likes'] = np.array([tweet['favorite_count'] for tweet in tweets])
    df['retweets'] = np.array([tweet['retweet_count'] for tweet in tweets])
    df['coor'] = np.array([tweet["coordinates"] for tweet in tweets])

    df['hashtags'] = np.array([tweet['entities']['hashtags']
                               for tweet in tweets])
    return df

def read_dat(file_path):
    
    lines = []
    with open(file_path) as f:
    
        for line in f.readlines():
            line = line.strip()
            lines.append(json.loads(line))
    return lines


    




melbourne_dat = read_dat("../melborune_search_json")
melbourne_df = make_df_from_tweets(melbourne_dat)

geelong_dat = read_dat("../geelong_search_json")
geelong_df = make_df_from_tweets(geelong_dat)

brisbane_dat = read_dat("../brisbane_search_json")
brisbane_df = make_df_from_tweets(brisbane_dat)

sydney_dat = read_dat("../sydney_search_json")
sydney_df = make_df_from_tweets(sydney_dat)

In [15]:
print(geelong_df.shape)
print(melbourne_df.shape)

(3391, 12)
(12794, 12)


In [4]:
from textblob import TextBlob

def is_negative_sentiment(text):
    
    blob = TextBlob(text)
    
    score = blob.sentiment.polarity
    
    if score < 0:
        return 1
    else:
        return 0
    
dfs = [melbourne_df , geelong_df , brisbane_df , sydney_df]
for df in dfs:
     df['is_negative'] = df['tweets'].map(lambda x:is_negative_sentiment(x))

In [5]:
sydney_df.head()

Unnamed: 0,tweets,screen_name,user_id,user_location,id,len,date,source,likes,retweets,coor,hashtags,is_negative
0,RT @Peter_Fitz: #Auspol. Strange that she didn...,darryllarkin,297295417,Northern Rivers NSW Australia,1124230433598623744,140,Fri May 03 08:33:17 +0000 2019,Twitter for Android,0,127,,"[{'text': 'Auspol', 'indices': [16, 23]}]",1
1,RT @Peter_Fitz: #Auspol. Strange that she didn...,Kim_AussieGirl,3749216294,"Balga, Perth (WA)",1124230415026233344,140,Fri May 03 08:33:12 +0000 2019,Twitter for iPhone,0,127,,"[{'text': 'Auspol', 'indices': [16, 23]}]",1
2,RT @vittlesorg: BREAKING NEWS - pass it on! \n...,Booners05,260921555,,1124230391382937600,140,Fri May 03 08:33:07 +0000 2019,Twitter for Android,0,42,,"[{'text': 'AngusTaylor', 'indices': [46, 58]},...",0
3,We’ve finally worked out why George was in the...,Bay35Pablo,93950111,Sydney,1124230390175027200,140,Fri May 03 08:33:06 +0000 2019,Twitter for iPhone,0,0,,[],0
4,RT @FrBower: So @TonyAbbottMHR is concerned ab...,stevie_bro,412037761,Quandamooka Country QLD 🇦🇺,1124230246423613440,140,Fri May 03 08:32:32 +0000 2019,Twitter for iPad,0,242,,"[{'text': 'ClimateEmergency', 'indices': [94, ...",0


In [8]:

for df in dfs:
    print(df['is_negative'].sum() / df.shape[0])
    

0.19290292324527122
0.18460631082276616
0.2549885369788571
0.19980164784864204


['T',
 '_AXIS_ALIASES',
 '_AXIS_IALIASES',
 '_AXIS_LEN',
 '_AXIS_NAMES',
 '_AXIS_NUMBERS',
 '_AXIS_ORDERS',
 '_AXIS_REVERSED',
 '_AXIS_SLICEMAP',
 '__abs__',
 '__add__',
 '__and__',
 '__array__',
 '__array_priority__',
 '__array_wrap__',
 '__bool__',
 '__bytes__',
 '__class__',
 '__contains__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__div__',
 '__doc__',
 '__eq__',
 '__finalize__',
 '__floordiv__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__iand__',
 '__ifloordiv__',
 '__imod__',
 '__imul__',
 '__init__',
 '__init_subclass__',
 '__invert__',
 '__ior__',
 '__ipow__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__ixor__',
 '__le__',
 '__len__',
 '__lt__',
 '__matmul__',
 '__mod__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__nonzero__',
 '__or__',
 '__pos__',
 '__pow__',
 '__radd__',
 '__rand__',
 '__rdiv__',
 '__reduce__',

In [27]:
geelong_df['is_negative'].sum() / geelong_df.shape[0]

0.18460631082276616

In [28]:
melbourne_df['is_negative'].sum() / melbourne_df.shape[0]

0.19290292324527122

In [42]:
melbourne_df.groupby('source').count()['tweets'].sort_values()[-5:]

source
Twitter Web App        1166
Twitter for iPad       1454
Twitter Web Client     2319
Twitter for Android    3036
Twitter for iPhone     4241
Name: tweets, dtype: int64

In [43]:
geelong_df.groupby('source').count()['tweets'].sort_values()[-5:]

source
Twitter Web App         256
Twitter for iPad        531
Twitter Web Client      681
Twitter for Android     801
Twitter for iPhone     1065
Name: tweets, dtype: int64