In [1]:
import re
from nltk.stem import SnowballStemmer
from nltk.corpus import stopwords

stopwords = frozenset(stopwords.words("english") + [
 'ut', '\'re','.', ',', '--', '\'s', '?', ')', '(', ':', '\'',
 '\"', '-', '}', '{', '&', '|', 'rt', '', u'\u2014' ])

stemmer = SnowballStemmer("english")

tokenizer = re.compile(r"\w+")

JSON_FILENAME = "sample_west.json" 
EXCEL_FILENAME = "sample_west.xlsx"

In [2]:
import json


with open(JSON_FILENAME, "r") as file:
    sample = json.load(file)
    
sample

{'after': [{'_id': '5d625bc077a39fca2ded410f',
   'contributors': None,
   'coordinates': None,
   'created_at': '2019-08-25T09:58:19',
   'display_text_range': [50, 140],
   'entities': {'hashtags': [],
    'symbols': [],
    'urls': [{'display_url': 'twitter.com/i/web/status/1…',
      'expanded_url': 'https://twitter.com/i/web/status/1165564048953528321',
      'indices': [117, 140],
      'url': 'https://t.co/6YoEYDRrJB'}],
    'user_mentions': [{'id': 1157338850253049856,
      'id_str': '1157338850253049856',
      'indices': [0, 15],
      'name': 'Yorkshire Man in Lancashire',
      'screen_name': 'YorkshireTweet'},
     {'id': 1134728358,
      'id_str': '1134728358',
      'indices': [16, 29],
      'name': 'Jenny',
      'screen_name': 'queenbeeof90'},
     {'id': 14933304,
      'id_str': '14933304',
      'indices': [30, 40],
      'name': 'Jo Swinson',
      'screen_name': 'joswinson'},
     {'id': 5680622,
      'id_str': '5680622',
      'indices': [41, 49],
      'name

In [7]:
def getnicetext(r):
    try:
        text = r["extended_tweet"]["full_text"]
    except KeyError:
        try:
            text = r["text"][:getnicetext.regex.search(r["text"]).end()] + r["retweeted_status"]["extended_tweet"]["full_text"]
        except (KeyError, AttributeError):
            text = r["text"]

    return text

getnicetext.regex = re.compile(r"RT @[A-Za-z0-9_]{1,15}: ")

def analyze(statuses):
    climateWords = {'climat', 'chang', 'global', 'warm'}
    disastersWords = {'hurrican', 'earthquake', 'tornado', 'storm'}
    acceptWords = {'real', 'believ', 'deterior', 'grave', 'danger', 'ignor', 'warn', 'impact', 'extrem', 'resilien', 'sever'}
    denyWords = {'fake', 'hoax', 'deni', 'deny', 'doubt', 'scheme', 'scam'}
    posSentWords = {'hope', 'wish', 'best', 'pray', 'share', 'great', 'well', 'offer', 'resolv'}
    negSentWords = {'hate', 'stupid', 'offens', 'fear', 'fuck', 'crazi', 'shit', 'wast', 'disgrac', 'scold', 'anxieti'}
    
    for r in statuses:
        tags = set()
        words = [stemmer.stem(w).lower() for w in tokenizer.findall(getnicetext(r)) if w.lower() not in stopwords]
        
        for word in words:
            if word in climateWords:
                tags.add("climate")
                
            if word in disastersWords:
                tags.add("disaster")
                
            if word in acceptWords:
                tags.add("acceptance")
                
            if word in denyWords:
                tags.add("denial")
                
            if word in posSentWords:
                tags.add("positive")
                
            if word in negSentWords:
                tags.add("negative")
                
        r["tags"] = list(sorted(tags))

analyze(sample["before"])
analyze(sample["during"])
analyze(sample["after"])

In [14]:
import pandas as pd
import numpy as np
import datetime
import collections

def statusestodf(statuses):
    data = collections.OrderedDict([
        ("ID", []),
        ("Username", []),
        ("Timestamp", []),
        ("Text", []),
        ("Climate change?", []),
        ("Natural disaster?", []),
        ("Acceptance/Denial?", []),
        ("Positive/Negative?", [])
    ])
    
    for r in statuses:
        data["ID"].append(r["id_str"])
        data["Username"].append(r["user"]["screen_name"])
        data["Timestamp"].append(datetime.datetime.strptime(r["created_at"], "%Y-%m-%dT%H:%M:%S").replace(tzinfo = datetime.timezone.utc))
        data["Text"].append(getnicetext(r))
        
        data["Climate change?"].append("climate" in r["tags"])
        data["Natural disaster?"].append("disaster" in r["tags"])
        
        if "acceptance" in r["tags"] and "denial" in r["tags"]:
            belief = np.nan
        elif "acceptance" in r["tags"]:
            belief = "Acceptance"
        elif "denial" in r["tags"]:
            belief = "Denial"
        else:
            belief = np.nan
        
        if "positive" in r["tags"] and "negative" in r["tags"]:
            sentiment = np.nan
        elif "positive" in r["tags"]:
            sentiment = "Positive"
        elif "negative" in r["tags"]:
            sentiment = "Negative"
        else:
            sentiment = np.nan
            
        data["Acceptance/Denial?"].append(belief)
        data["Positive/Negative?"].append(sentiment)
    
    return pd.DataFrame(data)

before_df = statusestodf(sample["before"])
during_df = statusestodf(sample["during"])
after_df = statusestodf(sample["after"])

during_df

Unnamed: 0,ID,Username,Timestamp,Text,Climate change?,Natural disaster?,Acceptance/Denial?,Positive/Negative?
0,1165564048953528321,PaulineMoorhou2,2019-08-25 09:58:19+00:00,@YorkshireTweet @queenbeeof90 @joswinson @LibD...,True,False,,Negative
1,1165576661896531969,TheAndrewHorton,2019-08-25 10:48:26+00:00,"Each week, @azeem @exponentialview reminds us ...",True,False,,
2,1166363282430660609,Stop_Solaroad,2019-08-27 14:54:11+00:00,@SArchibald @feeonline In the Netherlands we h...,False,False,,Negative
3,1167570630172168192,kaarrff,2019-08-30 22:51:45+00:00,RT @RealRichardBail: Faulty reasoning. Pro cli...,True,False,,
4,1167568849484619776,LeavesBandMates,2019-08-30 22:44:40+00:00,@duolingous You ever watch Netflix for three d...,True,False,,
5,1166812665882304512,honeydroppss,2019-08-28 20:39:52+00:00,i have no patience for people who still think ...,True,False,Acceptance,
6,1166495239617597440,bln231,2019-08-27 23:38:32+00:00,@joehenriod @TheAtlantic @AdamSerwer @PeterBra...,True,False,Acceptance,
7,1165208588916334592,softyjmin,2019-08-24 10:25:50+00:00,i hate the fact that people try to stuff clima...,True,False,,Negative
8,1167771313274064896,henry0410,2019-08-31 12:09:11+00:00,RT @RickOShea321: Gotta love the Gammon that #...,True,False,,
9,1165137979565924354,MatheusBrunell3,2019-08-24 05:45:16+00:00,RT @tesla_truth: One of the biggest customers ...,False,False,,


In [17]:
writer = pd.ExcelWriter(EXCEL_FILENAME)

before_df.to_excel(writer, sheet_name = "Before")
during_df.to_excel(writer, sheet_name = "During")
after_df.to_excel(writer, sheet_name = "After")

writer.save()