In [5]:
import json
import csv
from collections import defaultdict
import numpy as np
import pandas as pd
from sklearn import preprocessing
import couchdb

In [6]:
# Dict to store all data. sa4 as key.
data_dict = defaultdict(dict)

In [7]:
# data_dict

#### Get Tweet Counts and Sentiment scores for each SA4 (Must be Tunneled into Couchdb instance)

In [8]:
user = 'admin'
password = 'password'
COUCH_ADDRESS = 'localhost'

# Connect to Couch DB Server
# server = couchdb.Server("http://{}:{}@{}:5984/".format(user, password, COUCH_ADDRESS))
server = couchdb.Server("http://{}:{}@{}:15984/".format(user, password, COUCH_ADDRESS))
db = server['tweets']

front_end_db = server['front_end']

In [9]:
front_end_db

<Database 'front_end'>

In [10]:
front_end_db['output']

<Document 'output'@'117-e745fea297c574c18e44901e07f9768c' {'_attachments': {'out_data.json': {'content_type': 'application/json', 'revpos': 117, 'digest': 'md5-XeGVcpeo2DeGhXXPNUI3nA==', 'length': 36403, 'stub': True}}}>

In [11]:
tweet_counts = {}
sent_sum = {}
# Store Tweet Counts
for code in db.view('Results/TweetCount', group='true'):
    tweet_counts[code.key] = code.value

# Store Sentiment Scores
for code in db.view('Results/SentimentSum', group='true'):
    sent_sum[code.key] = code.value

In [12]:
for key in tweet_counts.keys():
    count = tweet_counts[key]
    sent = sent_sum[key]
    score = sent/count
    
    # store in data dict
    data_dict[key]['sentiment_score'] = score
    data_dict[key]['tweet_counts'] = count
    data_dict[key]['sent_sum'] = sent
    

#### Process data from Aurin & SA4 Geojson 

In [13]:
# Import sa4 geojson file and aurin data files
sa4_geo_file = json.load(open('SA4_geojson.json'))
sa4_centroids = json.load(open('sa4_geojson_centroid.json'))


# crime data needs lga_sa4 conversion
crime_data = json.load(open('crimedata.json'))
income_data = json.load(open('equivalisedincomedata.json'))
family_data = json.load(open('familycommunitydata.json'))
unemployment_data = json.load(open('SA4_unemployment.json'))
population_data = json.load(open('populationdata.json'))
industry_data = json.load(open('industry.json'))
socio_advantage_data = json.load(open('socioirsaddata.json'))
personal_income = json.load(open('personalincomedata.json'))
house_price = json.load(open('housepricedata.json'))

lga_sa4 = open('lga_sa4.csv', 'r')

In [14]:
# Process sa4 lga conversion
lga_sa4_dict = defaultdict(list)
for line in lga_sa4:
    line = line.strip('\n')
    (lga, sa4) = line.split(",")
    lga_sa4_dict[lga] = sa4
    
# Create DF for data analysis


In [15]:
def process_aurin(data_set, features):

    for feature in features:
        try:
            for item in data_set['features']:
                if item['properties']['sa4_code_2016'] in data_dict.keys():
                    data_dict[item['properties']['sa4_code_2016']][feature] = item['properties'][feature]
        except:
            continue

In [16]:
def add_to_dict(key, d, val):
    if key in d:
        d[key] = d[key] + val
    else:
        d[key] = val

In [17]:
# Process crime sa4_lga conversion
# Add sa4 to each LGA in crime_data
# for item in crime_data['features']:
#     if item['properties']['lga_code'] in lga_sa4_dict.keys():
        
#         a = item['properties']['total_division_a_offences']
#         b = item['properties']['total_division_b_offences']
#         c = item['properties']['total_division_c_offences']
#         d = item['properties']['total_division_d_offences']
#         e = item['properties']['total_division_e_offences']
#         f = item['properties']['total_division_f_offences']
#         sum_crimes = a+b+c+d+e+f
        
#         item['properties']['sa4_code_2016'] = lga_sa4_dict[item['properties']['lga_code']]
#         item['properties']['sum_crimes'] = sum_crimes

In [18]:
# Process for Unemployment job search weeks data
for item in unemployment_data['features']:
    if str(item['properties']['sa4_code']) in data_dict.keys():
        data_dict[str(item['properties']['sa4_code'])]['unemployed_rate'] = item['properties']['unemployed_tot_000']
#         data_dict[str(item['properties']['sa4_code'])]['avg_duration_job_search_wks'] = item['properties']['avg_duration_job_search_wks']

In [19]:
# Median House Price
for item in house_price['features']:
    if str(item['properties']['sa42016code']) in data_dict.keys():
        data_dict[str(item['properties']['sa42016code'])]['median_house_price'] = item['properties']['sold_both_auction_private_treaty_medianprice']

In [20]:
# Process income and family features
income_features = ['equivalised_total_household_income_census_median_weekly']
family_features = ['rent_mortgage_payments_census_average_monthly_household_payment', 'sa4_name_2016']
# crime_features = ['total_division_a_offences', 'total_division_b_offences', 'total_division_c_offences', 
#                   'total_division_d_offences', 'total_division_e_offences', 'total_division_f_offences', 
#                   'sum_crimes']
# socio_features = ['irsad_score']
personal_income_features = ['mean_aud', 'median_aud']

process_aurin(income_data, income_features)
process_aurin(family_data, family_features)
process_aurin(personal_income, personal_income_features)
# process_aurin(crime_data, crime_features)
# process_aurin(socio_advantage_data, socio_features)

In [21]:
data_dict

defaultdict(dict,
            {'101': {'sentiment_score': 0.2523570712136409,
              'tweet_counts': 4985,
              'sent_sum': 1258,
              'unemployed_rate': 5.76200061,
              'median_house_price': 550000.0,
              'equivalised_total_household_income_census_median_weekly': 795,
              'rent_mortgage_payments_census_average_monthly_household_payment': 1805,
              'sa4_name_2016': 'Capital Region',
              'mean_aud': 54054,
              'median_aud': 44110},
             '102': {'sentiment_score': 0.2826027397260274,
              'tweet_counts': 7300,
              'sent_sum': 2063,
              'unemployed_rate': 9.20597806,
              'median_house_price': 680000.0,
              'equivalised_total_household_income_census_median_weekly': 774,
              'rent_mortgage_payments_census_average_monthly_household_payment': 1890,
              'sa4_name_2016': 'Central Coast',
              'mean_aud': 53716,
              '

In [22]:
# # Add population data
# for item in population_data['features']:
#     if item['properties']['sa4_code16'] in data_dict.keys():
#         data_dict[item['properties']['sa4_code16']]['persons_total'] = item['properties']['persons_total']


In [23]:
# Put centroid in
for item in sa4_centroids['features']:
    if item['properties']['SA4_CODE16'] in data_dict.keys():
        data_dict[item['properties']['SA4_CODE16']]['centroid'] = item['geometry']['coordinates']
    

In [24]:
# for item in industry_data['features']:
#     if item['properties']['sa4_code_2016'] in data_dict.keys():
#         data_dict[item['properties']['sa4_code_2016']]['num_recreation_busi'] = item['properties']['number_businesses_industry_30_june_arts_recreation_services_num']
#         data_dict[item['properties']['sa4_code_2016']]['num_scientific_busi'] = item['properties']['nmbr_bsnsss_indstry_30_jne_prfssnl_scntfc_tchncl_srvcs_nm']
#         data_dict[item['properties']['sa4_code_2016']]['num_mining_busi'] = item['properties']['number_of_businesses_by_industry_as_at_30_june_mining_num']
#         data_dict[item['properties']['sa4_code_2016']]['num_finance_busi'] = item['properties']['nmbr_bsnsss_indstry_30_jne_fnncl_insrnce_srvcs_nm']
#         data_dict[item['properties']['sa4_code_2016']]['num_agri_busi'] = item['properties']['nmbr_bsnsss_indstry_30_jne_agrcltre_frstry_fshng_nm']

In [25]:
# personal_income

In [26]:
data_dict

defaultdict(dict,
            {'101': {'sentiment_score': 0.2523570712136409,
              'tweet_counts': 4985,
              'sent_sum': 1258,
              'unemployed_rate': 5.76200061,
              'median_house_price': 550000.0,
              'equivalised_total_household_income_census_median_weekly': 795,
              'rent_mortgage_payments_census_average_monthly_household_payment': 1805,
              'sa4_name_2016': 'Capital Region',
              'mean_aud': 54054,
              'median_aud': 44110,
              'centroid': [149.24, -35.56]},
             '102': {'sentiment_score': 0.2826027397260274,
              'tweet_counts': 7300,
              'sent_sum': 2063,
              'unemployed_rate': 9.20597806,
              'median_house_price': 680000.0,
              'equivalised_total_household_income_census_median_weekly': 774,
              'rent_mortgage_payments_census_average_monthly_household_payment': 1890,
              'sa4_name_2016': 'Central Coast',
    

#### Analysis of Tweets

In [27]:
data_df=pd.DataFrame.from_dict(data_dict, orient='index')

In [28]:
data_df

Unnamed: 0,sentiment_score,tweet_counts,sent_sum,unemployed_rate,median_house_price,equivalised_total_household_income_census_median_weekly,rent_mortgage_payments_census_average_monthly_household_payment,sa4_name_2016,mean_aud,median_aud,centroid
101,0.252357,4985,1258,5.762001,550000.0,795,1805,Capital Region,54054,44110,"[149.24, -35.56]"
102,0.282603,7300,2063,9.205978,680000.0,774,1890,Central Coast,53716,43677,"[151.29, -33.31]"
103,0.280000,750,210,5.622973,429000.0,742,1625,Central West,54112,43834,"[148.36, -33.22]"
104,0.190299,268,51,2.280920,516000.0,648,1568,Coffs Harbour - Grafton,45249,37017,"[152.77, -29.82]"
106,0.532164,684,364,7.075872,485000.0,786,1801,Hunter Valley exc Newcastle,58769,45556,"[150.98, -32.35]"
...,...,...,...,...,...,...,...,...,...,...,...
603,0.552846,369,204,0.854961,400000.0,625,1277,South East,44407,36927,"[146.82, -42.61]"
604,0.605263,114,69,3.025266,305000.0,644,1271,West and North West,49216,41816,"[145.56, -41.73]"
701,0.411624,7106,2925,5.404876,470000.0,1316,2383,Darwin,71185,60572,"[131.11, -12.53]"
702,1.000000,3,3,3.117814,420000.0,954,1945,Northern Territory - Outback,59232,51797,"[133.37, -19.49]"


In [29]:
data_df.corr()

Unnamed: 0,sentiment_score,tweet_counts,sent_sum,unemployed_rate,median_house_price,equivalised_total_household_income_census_median_weekly,rent_mortgage_payments_census_average_monthly_household_payment,mean_aud,median_aud
sentiment_score,1.0,-0.06118,0.314599,0.104083,0.030826,-0.045684,0.027396,-0.038097,-0.063326
tweet_counts,-0.06118,1.0,0.782898,0.086287,-0.002042,0.155176,0.120497,0.174564,0.154658
sent_sum,0.314599,0.782898,1.0,0.067034,-0.00589,0.077416,0.097766,0.130595,0.09169
unemployed_rate,0.104083,0.086287,0.067034,1.0,0.238516,0.190672,0.302258,0.11292,0.199347
median_house_price,0.030826,-0.002042,-0.00589,0.238516,1.0,0.752863,0.836553,0.740191,0.555972
equivalised_total_household_income_census_median_weekly,-0.045684,0.155176,0.077416,0.190672,0.752863,1.0,0.90541,0.924634,0.924687
rent_mortgage_payments_census_average_monthly_household_payment,0.027396,0.120497,0.097766,0.302258,0.836553,0.90541,1.0,0.854621,0.81476
mean_aud,-0.038097,0.174564,0.130595,0.11292,0.740191,0.924634,0.854621,1.0,0.887431
median_aud,-0.063326,0.154658,0.09169,0.199347,0.555972,0.924687,0.81476,0.887431,1.0


In [30]:
# import seaborn as sns; sns.set_theme(color_codes=True)


# ax = sns.regplot(x="mean_aud", y="sentiment_score", data=data_df)

In [31]:
round(200.1234, 2)

200.12

In [32]:
# min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 100)) 
min_max_scaler = preprocessing.MinMaxScaler()

sentiment_score_scaled = min_max_scaler.fit_transform(np.array(data_df.sentiment_score).reshape(-1, 1))
tweet_counts_scaled = min_max_scaler.fit_transform(np.array(data_df.tweet_counts).reshape(-1, 1))
sent_sum_scaled = min_max_scaler.fit_transform(np.array(data_df.sent_sum).reshape(-1, 1))
unemployed_rate_scaled = min_max_scaler.fit_transform(np.array(data_df.unemployed_rate).reshape(-1, 1))
median_house_price_scaled = min_max_scaler.fit_transform(np.array(data_df.median_house_price).reshape(-1, 1))
equivalised_total_household_income_census_median_weekly_scaled = min_max_scaler.fit_transform(np.array(data_df.equivalised_total_household_income_census_median_weekly).reshape(-1, 1))
rent_mortgage_payments_census_average_monthly_household_payment_scaled = min_max_scaler.fit_transform(np.array(data_df.rent_mortgage_payments_census_average_monthly_household_payment).reshape(-1, 1))
mean_aud_scaled = min_max_scaler.fit_transform(np.array(data_df.mean_aud).reshape(-1, 1))
median_aud_scaled = min_max_scaler.fit_transform(np.array(data_df.median_aud).reshape(-1, 1))

data_df['sentiment_score_scaled'] = sentiment_score_scaled
data_df['tweet_counts_s'] = tweet_counts_scaled
data_df['sent_sum_s'] = sent_sum_scaled
data_df['unemployed_rate_scaled'] = unemployed_rate_scaled
data_df['median_house_price_s'] = median_house_price_scaled
data_df['equivalised_total_household_income_census_median_weekly_s'] = equivalised_total_household_income_census_median_weekly_scaled
data_df['rent_mortgage_payments_census_average_monthly_household_payment_s'] = rent_mortgage_payments_census_average_monthly_household_payment_scaled
data_df['mean_aud_s'] = mean_aud_scaled
data_df['median_aud_s'] = median_aud_scaled


for index, row in data_df.iterrows():
    if str(index) in data_dict.keys():
        data_dict[str(index)]['sentiment_score'] = [round(data_dict[str(index)]['sentiment_score'],2), round(row['sentiment_score_scaled'],2)]
        data_dict[str(index)]['tweet_counts'] = [round(data_dict[str(index)]['tweet_counts'],2), round(row['tweet_counts_s'],2)]
        data_dict[str(index)]['sent_sum'] = [round(data_dict[str(index)]['sent_sum'],2), round(row['sent_sum_s'],2)]
        data_dict[str(index)]['median_house_price'] = [round(data_dict[str(index)]['median_house_price'],2), round(row['median_house_price_s'],2)]
        data_dict[str(index)]['equivalised_total_household_income_census_median_weekly'] = [round(data_dict[str(index)]['equivalised_total_household_income_census_median_weekly'],2), round(row['equivalised_total_household_income_census_median_weekly_s'],2)]
        data_dict[str(index)]['rent_mortgage_payments_census_average_monthly_household_payment'] = [round(data_dict[str(index)]['rent_mortgage_payments_census_average_monthly_household_payment'],2), round(row['rent_mortgage_payments_census_average_monthly_household_payment_s'],2)]
        data_dict[str(index)]['mean_aud'] = [round(data_dict[str(index)]['mean_aud'],2), round(row['mean_aud_s'],2)]
        data_dict[str(index)]['median_aud'] = [round(data_dict[str(index)]['median_aud'],2), round(row['median_aud_s'],2)]
        try:
            data_dict[str(index)]['unemployed_rate'] = [round(data_dict[str(index)]['unemployed_rate'],2), round(row['unemployed_rate_scaled'],2)]
        except KeyError:
            data_dict[str(index)]['unemployed_rate'] = ['No Record', 'No Record']
# data_dict


In [33]:
data_dict

defaultdict(dict,
            {'101': {'sentiment_score': [0.25, 0.29],
              'tweet_counts': [4985, 0.68],
              'sent_sum': [1258, 0.27],
              'unemployed_rate': [5.76, 0.13],
              'median_house_price': [550000.0, 0.11],
              'equivalised_total_household_income_census_median_weekly': [795,
               0.22],
              'rent_mortgage_payments_census_average_monthly_household_payment': [1805,
               0.34],
              'sa4_name_2016': 'Capital Region',
              'mean_aud': [54054, 0.16],
              'median_aud': [44110, 0.32],
              'centroid': [149.24, -35.56]},
             '102': {'sentiment_score': [0.28, 0.3],
              'tweet_counts': [7300, 1.0],
              'sent_sum': [2063, 0.37],
              'unemployed_rate': [9.21, 0.22],
              'median_house_price': [680000.0, 0.16],
              'equivalised_total_household_income_census_median_weekly': [774,
               0.2],
              're

In [118]:
for sa4, value in data_dict.items():
    print(value)

{'sentiment_score': [0.25, 0.29], 'tweet_counts': [4985, 0.68], 'sent_sum': [1258, 0.27], 'unemployed_rate': [5.76, 0.13], 'median_house_price': [550000.0, 0.11], 'equivalised_total_household_income_census_median_weekly': [795, 0.22], 'rent_mortgage_payments_census_average_monthly_household_payment': [1805, 0.34], 'sa4_name_2016': 'Capital Region', 'mean_aud': [54054, 0.16], 'median_aud': [44110, 0.32], 'centroid': [149.24, -35.56]}
{'sentiment_score': [0.28, 0.3], 'tweet_counts': [7300, 1.0], 'sent_sum': [2063, 0.37], 'unemployed_rate': [9.21, 0.22], 'median_house_price': [680000.0, 0.16], 'equivalised_total_household_income_census_median_weekly': [774, 0.2], 'rent_mortgage_payments_census_average_monthly_household_payment': [1890, 0.38], 'sa4_name_2016': 'Central Coast', 'mean_aud': [53716, 0.16], 'median_aud': [43677, 0.3], 'centroid': [151.29, -33.31]}
{'sentiment_score': [0.28, 0.3], 'tweet_counts': [750, 0.1], 'sent_sum': [210, 0.15], 'unemployed_rate': [5.62, 0.13], 'median_hous

In [113]:
# factor = 1.0/sum(data_dict[''])

normalised_sent = []
normalised_counts = []
normalised_sum = []
for sa4 in data_dict:
    for sent in data_dict[sa4]['sentiment_score']:
        normalised_sent.append([sa4, sent])
    for count in data_dict[sa4]['tweet_counts']:
        normalised_counts.append([sa4, count])
    for sent_sum in data_dict[sa4]['sent_sum']:
        normalised_sum.append([sa4, sent_sum])

In [114]:
# sent_factor = 1.0/sum(normalised_sent.values())
# # normalised_sent
# for item in normalised_sent:
#     normalised_sent[item] = [normalised_sent[item], normalised_sent[item]*sent_factor]

In [115]:
# sent_scaled_val = min_max_scaler.fit_transform(np.array(normalised_sent)[:, 1].reshape(-1, 1))
sent_scaled_val = min_max_scaler.fit_transform(np.array(normalised_sent)[:, 1].reshape(-1, 1))
count_scaled_val = min_max_scaler.fit_transform(np.array(normalised_counts)[:, 1].reshape(-1, 1))
sum_scaled_val = min_max_scaler.fit_transform(np.array(normalised_sum)[:, 1].reshape(-1, 1))

# np.array(normalised_sent.values())

In [116]:
for i, v in enumerate(sent_scaled_val):
    normalised_sent[i].append(v[0])
for i, v in enumerate(count_scaled_val):
    normalised_counts[i].append(v[0])
for i, v in enumerate(sum_scaled_val):
    normalised_sum[i].append(v[0])

In [117]:
normalised_sent

[['101', 0.25, 0.28640776699029125],
 ['101', 0.29, 0.3058252427184466],
 ['102', 0.28, 0.3009708737864078],
 ['102', 0.3, 0.3106796116504854],
 ['103', 0.28, 0.3009708737864078],
 ['103', 0.3, 0.3106796116504854],
 ['104', 0.19, 0.25728155339805825],
 ['104', 0.26, 0.2912621359223301],
 ['106', 0.53, 0.4223300970873787],
 ['106', 0.42, 0.36893203883495146],
 ['107', 0.56, 0.4368932038834952],
 ['107', 0.44, 0.3786407766990292],
 ['108', 0.47, 0.3932038834951456],
 ['108', 0.39, 0.35436893203883496],
 ['110', 0.25, 0.28640776699029125],
 ['110', 0.29, 0.3058252427184466],
 ['111', 0.48, 0.3980582524271845],
 ['111', 0.4, 0.35922330097087385],
 ['112', 1.72, 1.0],
 ['112', 1.0, 0.6504854368932039],
 ['113', 0.43, 0.3737864077669903],
 ['113', 0.38, 0.34951456310679613],
 ['114', 0.32, 0.32038834951456313],
 ['114', 0.32, 0.32038834951456313],
 ['115', 0.62, 0.46601941747572817],
 ['115', 0.47, 0.3932038834951456],
 ['116', 0.03, 0.1796116504854369],
 ['116', 0.18, 0.2524271844660194],
 

In [52]:
out = []
for key, value in data_dict.items():
    curr_d = value
    curr_d['sa4_code'] = key
    out.append(curr_d)

In [53]:
out

[{'sentiment_score': [0.28, 0.17],
  'tweet_counts': [38643, 0.1],
  'sent_sum': [11001, 0.06],
  'unemployed_rate': [5.76, 0.13],
  'median_house_price': [550000.0, 0.11],
  'equivalised_total_household_income_census_median_weekly': [795, 0.22],
  'rent_mortgage_payments_census_average_monthly_household_payment': [1805,
   0.34],
  'sa4_name_2016': 'Capital Region',
  'mean_aud': [54054, 0.16],
  'median_aud': [44110, 0.32],
  'centroid': [149.24, -35.56],
  'sa4_code': '101'},
 {'sentiment_score': [0.01, 0.11],
  'tweet_counts': [18996, 0.05],
  'sent_sum': [159, 0.0],
  'unemployed_rate': [9.21, 0.22],
  'median_house_price': [680000.0, 0.16],
  'equivalised_total_household_income_census_median_weekly': [774, 0.2],
  'rent_mortgage_payments_census_average_monthly_household_payment': [1890,
   0.38],
  'sa4_name_2016': 'Central Coast',
  'mean_aud': [53716, 0.16],
  'median_aud': [43677, 0.3],
  'centroid': [151.29, -33.31],
  'sa4_code': '102'},
 {'sentiment_score': [0.36, 0.19],
  

In [54]:
with open('front_output.json', 'w') as outfile:
    json.dump(out, outfile)

In [48]:
'output' in front_end_db

True

In [4]:
file =  open('front_output.json', 'r').read()
# in_test = json.loads(file.decode('utf8').replace("'", '"'))
in_test = json.loads(file)
in_test[0]

{'sentiment_score': [0.28, 0.17],
 'tweet_counts': [38643, 0.1],
 'sent_sum': [11001, 0.06],
 'unemployed_rate': [5.76, 0.13],
 'median_house_price': [550000.0, 0.11],
 'equivalised_total_household_income_census_median_weekly': [795, 0.22],
 'rent_mortgage_payments_census_average_monthly_household_payment': [1805,
  0.34],
 'sa4_name_2016': 'Capital Region',
 'mean_aud': [54054, 0.16],
 'median_aud': [44110, 0.32],
 'centroid': [149.24, -35.56],
 'sa4_code': '101'}

### Update Sentiment Scores periodically

In [120]:

#get file from DB
in_file = front_end_db.get_attachment('output', 'out_data.json').read()
in_json = json.loads(in_file.decode('utf8').replace("'", '"'))


In [121]:
in_json[0]

{'sentiment_score': [0.2846828662370934, 0.17437397027490964],
 'tweet_counts': [38643, 0.10564302488661186],
 'sent_sum': [11001, 0.05829342666429148],
 'median_house_price': [550000.0, 0.10821643286573147],
 'equivalised_total_household_income_census_median_weekly': [795,
  0.22285714285714286],
 'rent_mortgage_payments_census_average_monthly_household_payment': [1805,
  0.3354614045269878],
 'sa4_name_2016': 'Capital Region',
 'mean_aud': [54054, 0.16168338796728587],
 'median_aud': [44110, 0.31582840236686405],
 'centroid': [149.24, -35.56],
 'sa4_code': '101'}

In [122]:
# Calculate sentiment score and normalised sentiment score
sentiment_score = {}
for key in tweet_counts.keys():
    count = tweet_counts[key]
    sent = sent_sum[key]
    score = sent/count
    
    sentiment_score[key] = score


In [123]:
# sentiment_score

In [124]:
# Update file
for item in in_json:
    if item['sa4_code'] in sentiment_score.keys():
        key = item['sa4_code']
        item['sentiment_score'] = sentiment_score[key]
        item['sent_sum'] = sent_sum[key]
        item['tweet_counts'] = tweet_counts[key]

In [125]:
# Put attachment to DB
doc = front_end_db['test_3']
front_end_db.put_attachment(doc, in_json, 'out_data.json', "application/json")

In [5]:
# Put attachment to DB
doc = front_end_db['output']
# front_end_db.put_attachment(doc, out, 'out_data.json', "application/json")
front_end_db.put_attachment(doc, in_test, 'out_data.json', "application/json")

#### Process Output json

In [None]:
# Output processed geojson
def merge_data(data, sa4_geo):
    output={"type": "FeatureCollection", "features":[]}
    for row in sa4_geo['features']:
        key = row['properties']['SA4_CODE16']
        if key in data.keys():
            for feature in data[key].keys():
                row['properties'][feature]= data[key][feature]
        else:
            for feature in data[key].keys():
                row['properties'][feature]='No Record'
   
    with open('output.json', 'w') as outfile:
        json.dump(sa4_geo, outfile)

In [None]:
# merge_data(data_dict, sa4_geo_file)


In [None]:
# check = json.load(open('output.json'))
# for item in check['features']:
#     if item['properties']['SA4_CODE16'] == '117':
#         print(item)

In [None]:
len(data_dict)

In [None]:
data_dict.keys()

In [None]:
sa4_geo_file['features'][2]