In [None]:
import time
import re
import requests
import MySQLdb as mdb
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
def message_matches(user_id, message_text):
    '''
    Check if the username and the word 'bot' appears in the text
    '''
    regex_expression = '.*@' + user_id + '.*bot.*'
    regex = re.compile(regex_expression)
    # Check if the message text matches the regex above
    match = regex.match(message_text)
    # returns true if the match is not None (ie the regex had a match)
    return match != None 

In [None]:
def extract_station_name(message_text):
    '''
    Extract the station name. The regex relies on the question following a given pattern, so that we
    can extract the name of the station. In a more realistic chatbot, we would add multiple such patterns
    to make the interaction with the user easier.
    '''
    regex_expression = 'bikes on (.+) station'
    regex= re.compile(regex_expression)
    matches = regex.finditer(message_text)
    for match in matches:
        return match.group(1)
    
    # if there were no matches, return None
    return None

In [None]:
def get_citibike_data(station_name):
    '''
    Returns a list of dictionaries with the station name and available bikes
    for all stations that have a matching station name
    '''
    url = 'http://www.citibikenyc.com/stations/json'
    data = requests.get(url).json()["stationBeanList"] 
    # Create a list of dictionaries. Each dictionary has three entries, the station id, name, and available bikes
    result = [ {"station_id": entry["id"], "station_name": entry["stationName"], "available": entry["availableBikes"]} 
            for entry in data if station_name in entry["stationName"]]
    return result

In [None]:
def get_historic_citibike_data(station_id):
    '''
    For a given station_id, we connect to the database and return average usage
    data over the hours of the day. Notice that our query converts the UTC timestamp
    into NYC timezone. We return back a Pandas DataFrame with two columns (hours and bikes_available)
    and we set hours to be the index.
    '''
    con = mdb.connect(host = 'ipython.ipeirotis.com', 
                  user = 'root',
                  database = 'citibike_new',
                  passwd = 'dwdstudent2015', 
                  charset='utf8', use_unicode=True)
    
    query_template = '''
        SELECT HOUR(CONVERT_TZ(last_reported, '+00:00','-04:00')) AS hour, 
               FLOOR(AVG(num_bikes_available)) AS bikes_available
        FROM Status 
        WHERE station_id = %s
        GROUP BY HOUR(CONVERT_TZ(last_reported, '+00:00','-04:00'))
        ORDER BY HOUR(CONVERT_TZ(last_reported, '+00:00','-04:00'))
    '''
    cur = con.cursor(mdb.cursors.DictCursor)
    cur.execute(query_template, (station_id,) )
    bikes_available = cur.fetchall()
    cur.close()
    con.close()
    df = pd.DataFrame( list(bikes_available) )
    
    return df.set_index('hour').sort_index() 

In [None]:
def plot_station_data(station_id):
    filename = 'plots/'+str(station_id)+'.png'
    url = 'http://ipython.ipeirotis.com:5000/' + filename
    
    df = get_historic_citibike_data(station_id)
    fig = df.plot().get_figure()
    fig.savefig(filename)
    plt.close(fig)
    
    return url

In [None]:
def create_message(username, station_name):
    '''
    This function takes as input the username of the user that asked the question,
    and the station_name that we managed to extract from the question (potentially it can be None)
    We check the Citibike API and respond with the status of the Citibike stations.
    '''
    attachments = []
    if station_name != None:
        # We want to address the user with the username. Potentially, we can also check
        # if the user has added a first and last name, and use these instead of the username
        message = "Thank you @{u} for asking about the station on {s}.\n".format(u=username, s=station_name)

        # Let's get the data from the Citibike API
        matching_stations = get_citibike_data(station_name)
        # If we cannot find any matching station
        if len(matching_stations) == 0:
            message += "I could not find any matching station.\n"
        # If there are multiple matching stations
        if len(matching_stations) > 1:
            message += "We have multiple matching stations.\n"
            
        # Add the information for each station
        # We add the information as an Slack message "attachment" 
        # See https://api.slack.com/docs/message-attachments
        for station in matching_stations:
            station_id = station['station_id']
            address = station['station_name']
            bikes = station['available']
            url = plot_station_data(station_id)
            attachment = {
                "image_url": url,
                "title": "Historic data for station #{sid} at {a}".format(sid=station_id, a=address),
                "text": "Station #{sid} at {a} has {b} available bikes now.\n".format(sid=station_id, a=address, b=bikes)
            }
            attachments.append(attachment)
    else:
        message =  "Thank you @{u} for asking.".format(u=username)
        message += "Unfortunately I did not understand what is the station you are asking for.\n"
        message += "Ask me `how many bikes on XXXXX station` and I will try to answer."
        
    return message, attachments

In [None]:
# Read the access token from the file and create the Slack Client
import json

secrets_file = 'slack_secret.json'
f = open(secrets_file, 'r') 
content = f.read()
f.close()

auth_info = json.loads(content)
auth_token = auth_info["access_token"]
bot_user_id = auth_info["user_id"]

from slackclient import SlackClient
sc = SlackClient(auth_token)



In [None]:
# Connect to the Real Time Messaging API of Slack and process the events

if sc.rtm_connect():
    # We are going to be polling the Slack API for recent events continuously
    while True:
        # We are going to wait 1 second between monitoring attempts
        time.sleep(1)
        # If there are any new events, we will get a response. If there are no events, the response will be empty
        response = sc.rtm_read()
        for item in response:
            
            # Check that the event is a message. If not, ignore and proceed to the next event.
            if item.get("type") != 'message':
                continue
                
            # Check that the message comes from a user. If not, ignore and proceed to the next event.
            if item.get("user") == None:
                continue
            
            # Check that the message is asking the bot to do something. If not, ignore and proceed to the next event.
            message_text = item.get('text')
            if not message_matches(bot_user_id, message_text):
                continue
                
            # Get the username of the user who asked the question
            response = sc.api_call("users.info", user=item["user"])
            username = response['user'].get('name')
            
            # Extract the station name from the user's message
            station_name = extract_station_name(message_text)

            # Prepare the message that we will send back to the user
            message, attachments = create_message(username, station_name)

            # Post a response to the #bots channel
            sc.api_call("chat.postMessage", channel="#bots", text=message, attachments=attachments)
                
        