In [1]:
import json
import pandas as pd
import numpy as np
import datetime as dt

In [6]:
for query in ['christmas','easter','halloween','superbowl']:
    output=dict()
    output['word']=query
    # Open JSON <---- THIS WILL BE REPLACED WITH MONGO QUERY
    with open('../archive/d3demo/json/'+query+'.json', "r") as read_file:
        data = json.load(read_file)
    # Set columns for df
    columns=['counts','rank','time']
    # Build df
    df=pd.DataFrame(data, columns=columns)
    # Pull out beginning part of date (YY-MM-DD)
    df['time'] = [t[:10] for t in df['time']]
    # Convert date to date object
    df['time'] = [dt.datetime.strptime(t, '%Y-%m-%d').date() for t in df['time']]
    print("Before removing early dates, shape is", df.shape)
    # Remove dates before 2010
    df=df[df['time']>=(dt.date(2010,1,1))]
    print("After removing early dates, shape is", df.shape)
    # Calculate min, max, and mean rank
    output['maxrank']=int(min(df['rank'].values))
    output['minrank']=int(max(df['rank'].values))
    output['medianrank']=int(np.round(np.median(df['rank'].values)))
    # Index df by date
    df.set_index('time',inplace=True)
    # Find today's date
    dt.datetime.today().date()
    # How many days should we have in our dataset?
    daterange=(dt.datetime.today().date()-df.index.min()).days
    # Which days are we missing data for?
    # Get a list of all days between today and the beginning of time...
    missingdays=[(dt.datetime.today() - dt.timedelta(days=x+1)).date() for x in range(daterange)]
    # ... then remove any days that we *do* have in our data from our list of missing dates:
    for day in df.index:
        missingdays.remove(day)
    # Fill any missing days with placeholder values
    for day in missingdays:
        df.loc[day] = None
        df.loc[day]['counts'] = int(0)
        df.loc[day]['rank'] = int(999999999)
    # Sort days
    df.sort_values(by='time',ascending=True,inplace=True)
    # Convert time back to a string
    df.index=[t.strftime("%Y-%m-%d") for t in df.index]
    # Find the date on which the rank reached its first and most recent (could be the same day) minimum and maximum 
    output['firstmax']=df[df['rank']==output['maxrank']].index[-1]
    output['lastmax']=df[df['rank']==output['maxrank']].index[0]
    output['firstmin']=df[df['rank']==output['minrank']].index[-1]
    output['lastmin']=df[df['rank']==output['minrank']].index[0]
    # Send dates, ranks and counts as arrays to the output dict
    output['times']=df.index.values.tolist()
    output['ranks']=[int(r) for r in df['rank'].values] # Convert from int64 to Python integers
    output['counts']=[int(c) for c in df['counts'].values] # Convert from int64 to Python integers
    # Send the object to a JSON file
    url = '../ui/data/'+query+'.json'
    with open(url, 'w') as outfile:
        print('opened ',url)
        json.dump(output, outfile)
        outfile.close()
        print('closed ',url)

Before removing early dates, shape is (3937, 3)
After removing early dates, shape is (3427, 3)
opened  ../ui/data/christmas.json
closed  ../ui/data/christmas.json
Before removing early dates, shape is (3889, 3)
After removing early dates, shape is (3427, 3)
opened  ../ui/data/easter.json
closed  ../ui/data/easter.json
Before removing early dates, shape is (3888, 3)
After removing early dates, shape is (3427, 3)
opened  ../ui/data/halloween.json
closed  ../ui/data/halloween.json
Before removing early dates, shape is (3788, 3)
After removing early dates, shape is (3425, 3)
opened  ../ui/data/superbowl.json
closed  ../ui/data/superbowl.json


In [None]:
#!flask/bin/python
from flask import Flask

app = Flask(__name__)

@app.route('/')
def index():
    return "Hello, World!"

if __name__ == '__main__':
    app.run(debug=True, port='3001')