# General Set Up

In [1]:
import pandas as pd
import numpy as np
import time
import smtplib
from bs4 import BeautifulSoup
import xml.etree.ElementTree as ET
import requests
import cherrypy
import os, os.path

## Creating XML Tree Object ##

TARGET = 'servicestatus.json'
SOURCE = 'http://web.mta.info/status/serviceStatus.txt'

response = requests.get(SOURCE)
xml_string = response.text
root = ET.fromstring(xml_string)
# ET.fromstring converts a string of XML content to a XML tree object.


## Creating a list of all lines ##

alllines=['123','456','7','ACE','BDFM','G','JZ','L','NQR','S','SIR']

# Function to intake xml root and return full list of MTA lines present in the XML. 
(Should always be the same.) 

In [2]:
def getFullLineList(xmlroot):
    FullLineList = []
    num = 0
    
    while num < len(root[2]):
        FullLineList.append((num,root[2][num][0].text))
        num += 1
        
    return FullLineList

FullLineList = getFullLineList(root)
FullLineList

[(0, '123'),
 (1, '456'),
 (2, '7'),
 (3, 'ACE'),
 (4, 'BDFM'),
 (5, 'G'),
 (6, 'JZ'),
 (7, 'L'),
 (8, 'NQR'),
 (9, 'S'),
 (10, 'SIR')]

# Function to intake service status xml object and return lines with service changes as a list of tuples

In [3]:
def Return_SC_Lines(xmlroot):
    # xmlroot must be an xml object of the MTA Service Status page using xml Element Tree package
    num_lines = list(range(0,len(xmlroot[2])))
    SC_lines = []
    
    for ea in num_lines:
        if xmlroot[2][ea][1].text != 'GOOD SERVICE':
            SC_lines.append((xmlroot[2][ea][0].text,xmlroot[2][ea][1].text))
        else:
            pass 
        
    #SC_Lines is a list of tuples with the line name and service status 
    return SC_lines
    
SC_lines = Return_SC_Lines(root)
SC_lines

[('123', 'PLANNED WORK'), ('NQR', 'SERVICE CHANGE')]

# Function that returns a dictionary with lines as keys and 0 (delayed', 'works') as possible values

In [4]:
def dictionary():
    SC_lines=Return_SC_Lines(root)
    d={}
    for i in SC_lines:
        if i[1]=='DELAYS':
            d[i[0]]='delayed'
        if i[1]=='PLANNED WORK':
            d[i[0]]='works'
    return d
            

# Function to take line name and service status XML and return beautiful soup of that line's service status 

In [5]:
def MakeLine_ServiceStatusSoup(line_name,xmlroot): 
    line_iq = line_name
    line_iq_num = ''
    
    for ea in FullLineList:
        if ea[1] == line_iq:
            line_iq_num = ea[0]
    
    # get string and make soup from line in question's status
    status_str = xmlroot[2][line_iq_num][2].text
    linestatus_soup = BeautifulSoup(status_str, 'lxml')
    
    return linestatus_soup


#bdfmsoup = MakeLine_ServiceStatusSoup('BDFM',root)

# Function intake service status xml and a line with planned work and return description of the planned work

In [6]:
def plannedWork_Simple(line_servicestatus_soup):
    
    tag_options = ['span']
    classname_options = ['TitlePlannedWork']
    subtag_options = ['p']
    pwSimple_text = ''

    for spans in line_servicestatus_soup.find_all(tag_options, {'class':classname_options}):
        subs = spans.find_all_next(subtag_options)
        for each in subs:
            pwSimple_text += each.text
            
    return [pwSimple_text]


# this works for printing summary line, and not when planned work is a span

def plannedWork_Detail(line_servicestatus_soup):
    
    tag_options = ['a']
    classname_options = ['plannedWorkDetailLink']
    subtag_options = ['div','p']
    pwDetail_text_list = []
    
    for a_summ in line_servicestatus_soup.find_all(tag_options, {'class':classname_options}):
        pw_summtext = a_summ.text

        #print(pw_summtext,'\n', next_div, '\n')
        pwDetail_text_list.append(pw_summtext)
    
    return pwDetail_text_list
        


def PlannedWorkText(line_name,xmlroot):
    
    line_status_soup = MakeLine_ServiceStatusSoup(line_name,xmlroot)
    service_string = ''
    pw_text_list = []
    
    line_iq = line_name
    line_iq_num = ''
    
    for ea in FullLineList:
        if ea[1] == line_iq:
            line_iq_num = ea[0]
        
        
    if len(line_status_soup.find_all('a', {'class':'plannedWorkDetailLink'})) < 1:
        pw_text_list = plannedWork_Simple(line_status_soup)
            
    else:
        pw_text_list = plannedWork_Detail(line_status_soup)

        
        
    return [line_name, xmlroot[2][line_iq_num][1].text, pw_text_list]
    
for i in PlannedWorkText('123',root)[2]:
    print(i)
 

TRACK & TRACK PLATE INSTALLATION, REPLACEMENT OF POWER & COMMUNICATION CABLES[2] [3] Trains run at reduced speed through the Clark St Tunnel between Manhattan and Brooklyn


# Function to intake service status xml and a line name and return text of delays on that line, if applicable 

In [7]:
def delays_text(line_name,xmlroot):

    line_status_soup = MakeLine_ServiceStatusSoup(line_name,xmlroot)
    delay_string = ''
    
    delayline_name = line_name
    dealyline_num = ''
    
    for ea in FullLineList:
        if ea[1] == delayline_name:
            dealyline_num = ea[0]


    del_text = ''
    
    for del1 in line_status_soup.find_all('span', {'class': 'TitleDelay'}):
        
        # below checks that the delay description is contained in <p> tags
        if len(del1.find_all('p')) > 0:
            
            delay_deets = del1.find_all('p')
            
            for dels in delay_deets:
                del_text += dels.text + ' '
            
            
        else:
            del_text = ''.join(line_status_soup.find_all(text=True)[3:5]).strip()
            
            
    return del_text
print(delays_text('ACE',root))

TypeError: object of type 'NoneType' has no len()

# Function to return list of lines with delays or planned works

In [8]:
def delayedLines(xmlroot):
    SC_lines = Return_SC_Lines(xmlroot)
    l=[]
    for i in SC_lines:
        if i[1]!='GOOD SERVICE':
            l.append(i[0])
    return l

delayed=delayedLines(root)
delayed

['123', 'NQR']

# Initialize the User's data

In [9]:
def init():
    data=pd.DataFrame(index=['User','Time','123','456','7','ACE','BDFM','G','JZ','L','NQR','S','SIR'],dtype=str)
    return data

# Function to encode a list of lines as a list of bits, based on the alllines table

In [10]:
def binaryConverter(lines):
    '''
    Function that takes a list of lines and returns a sparse vector with bits equal to:
    -  1 if a line appears in the initial list
    -  0 if not
    @param lines: list of strings. Contains the list of lines.
    output: vector of int of length 11 (there are 11 lines in New York)
    '''
    binary_lines=alllines[:]
    for i in range(len(binary_lines)):
        if binary_lines[i] in lines:
            binary_lines[i]=1
        else:
            binary_lines[i]=0
    return binary_lines

def binaryDecoder(binary_line):
    lines=[]
    for i in range(len(binary_line)):
        if binary_line[i]==1:
            lines.append(alllines[i])
    return lines

print(delayed)
binary=binaryConverter(delayed)
print(binary)
regular=binaryDecoder(binary)
regular

['123', 'NQR']
[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]


['123', 'NQR']

# Functions to add or remove a user's profile with his email, commuting time and the lines he is taking

In [11]:
def addProfile(data,email,time,lines):
    '''
    adds commuting time in the list of all commuting times, adds time and lines for that user
    @param email: user's email
    @param time: commuting time
    @param lines: list of the lines concerned by those commuting times
    '''
    data_column=[email,time]
    binary_lines=binaryConverter(lines)
    for i in binary_lines:
        data_column.append(i)
    try:
        col=max(data.columns)+1
    except ValueError:
        col=0
    data[col]=data_column

def removeUser(data,email):
    p=0
    for user_email in data.iloc[0]:
        if user_email==email:
            del data[p]
        p+=1

def clear(data):
    for i in data.columns:
        del data[i]

# Define the time window for which we notify the users

In [12]:
time_window=15

# Function that gives, based on the list of delayed lines, the list of people to notify by email

In [13]:
def listToNotify(data,affected_lines):
    
    #First get our local time
    present_hour=time.localtime()[3]
    present_minutes=time.localtime()[4]
    #We express our time in minutes
    present_time=present_hour*60+present_minutes
    
    #Intialize the list of people to notify
    notif=[]
    
    #Convert the list of affected lines (i.e with planned works or delays) into binary format
    binary_affected_lines=binaryConverter(affected_lines)
    
    #Do a for loop on all users
    for col in data.columns:  
        column=data[col]
        user_time=column[1] #user_time is in 24hours format hh:mm
        user_hour=int(user_time[0:2]) #gets hh
        user_minutes=int(user_time[3:])   #gets mm
        
        #Handle the situation for commuting times around midnight which is a corner case
        if user_hour==0:
            if user_minutes>=time_window:
                user_commuting_time=user_minutes
            else:
                user_commuting_time=24*60+user_minutes
        else:
            user_commuting_time=user_hour*60+user_minutes

        #Check for the user in data[col] if he is concerned by one of the affected lines. If so, add him to the
        #list of people to notify
        if user_commuting_time - present_time == time_window:
            user_lines=column[2:]
            e=sum(np.logical_and(user_lines,binary_affected_lines)) # Check if the user entered an affected line
            if e>0:
                notif.append((column[0],col))
    return list(set(notif))  # allows to avoid duplicates and returns the list of unique elements in notif

# Function to trigger the emails

In [14]:


def emailTrigger(data,affected_lines):
    trigger_notifList = listToNotify(data,affected_lines)
    
    for user in trigger_notifList:
        sendmail(data,user,affected_lines)
        

In [15]:
def getall_usrNames(usr_df):
    return list(set(usr_df[0,:]))


getall_usrNames(data)

NameError: name 'data' is not defined

# Function that takes a list of lines and returns a text listing those lines

In [16]:
def list_lines_txt(lines):
    if len(lines)>2:
        s=''
        for i in range(len(lines)-2):
            s=s+lines[i]+', '
        s=s+lines[-2]+' and '+lines[-1]
    elif len(lines)==2:
        s=lines[0]+' and '+lines[1]
    else:
        s=lines[0]
    return s

print(list_lines_txt(alllines))
print(list_lines_txt(alllines[:2]))



123, 456, 7, ACE, BDFM, G, JZ, L, NQR, S and SIR
123 and 456


# Function that takes a list of affected lines and returns the reason of the delays or works

In [17]:
def detail_affected_lines(affected_lines):
    text=''
    d=dictionary()
    for line in affected_lines:
        if d[line]=='delayed':
            text=text+' Lines '+line+'\n Status: Delayed \n'+delays_text(line,root)+'\n \n'
        if d[line]=='works':
            text=text+' Lines '+line+'\n Status: Planned Works\n'
            for i in PlannedWorkText(line,root)[2]:
                text=text+i+'\n'
            text=text+'\n \n'
    return text+'\n'

In [18]:
def sendmail(data,user,affected_lines):
    '''
    Function to send emails
    @param data: our dataframe
    @param user: tuple (email,column). email is the user's email. column is the column where this user is in data 
    '''
    mail=user[0]
    column_number=user[1]
    server = smtplib.SMTP('smtp.gmail.com:587')
    server.ehlo()
    server.starttls()
    server.login("notificationmta@gmail.com", "PyProject1859")
    
    binary_affected_lines=binaryConverter(affected_lines)
    col=data[column_number]
    c=col[2:]
    user_binary_affected_lines=np.logical_and(binary_affected_lines,c)
    user_affected_lines=binaryDecoder(user_binary_affected_lines)
    
    s=list_lines_txt(affected_lines)
    
    s2=list_lines_txt(user_affected_lines)
    
    detail=detail_affected_lines(user_affected_lines)
    #Send the mail
    msg = "\r\n".join([
            "From: subwayhelper@gmail.com",
            "To: "+mail,
            "Subject: Guys please read this email and tell me what you think. PyProject last Test ALERT: Delays on line(s) "+str(s2),
            "",
            '''Dear user, \n \n There are currently delays on lines ''' +s+
            '''. You indicated that you will take line(s) '''+str(s2)+' in '+str(time_window)+' minutes'+''' so you are concerned by those delays or works and should consider another route. \n '''
            '''Here are the details for the lines you usually take \n \n'''+detail+''' Thanks for using our platform, \n \n The subway helper team'''
            ])
        # The /n separates the message from the headers
    server.sendmail("subwayhelper@gmail.com", mail, msg)
    print('Email successfully sent to '+str(mail))
    server.quit()

# Workspace

In [19]:
import time
print(time.localtime()[3])
print(time.localtime()[4])

19
31


In [20]:
data=init()
t=str(time.localtime()[3])+':'+str(time.localtime()[4])
time_window=0
delayed=delayedLines(root)
addProfile(data,'yk1859@nyu.edu',t,['ACE','BDFM','S'])
addProfile(data,'srf366@stern.nyu.edu',t,['G','JZ','BDFM','ACE'])
addProfile(data,'fh643@nyu.edu',t,['G','L','S','BDFM'])
emailTrigger(data,delayed)

In [21]:

# "return" is to show on the web page something
#modeled on tutorial 6
class MTA_Notification(object):
    @cherrypy.expose
    def index(self):
      return """<html>
          <head>
              <title>Sign-up Page</title>
              <link href="/static/css/style.css" rel="stylesheet">      
          </head>
          <body><fieldset><legend>MTA Notification</legend>
          <form method="get" action="submit" target="_self"><h1>Sign up</h1><br/><p>Enter your email address here: <br>
             <input type="email" name="email" placeholder="Email address" id="email" required></p><p> When do you take MTA? Enter time in 24hr format: <br>
             <input type="text"  name="time" placeholder="HH:MM" pattern="([01]?[0-9]|2[0-3]):[0-5][0-9]" id="time" required></p><p><fieldset><legend>Please select all the possible lines you take: </legend>
                 <input type="checkbox" name="lines" value="123"/><lable for="123">1,2,3</lable><br />
                 <input type="checkbox" name="lines" value="456"/><label for="456">4,5,6</label><br />
                 <input type="checkbox" name="lines" value="7"/><label for="7">7</label><br />
                 <input type="checkbox" name="lines" value="ACE"/><label for="ACE">A,C,E</label><br />
                 <input type="checkbox" name="lines" value="BDFM"/><label for="BDFM">B,D,F,M</label><br />
                 <input type="checkbox" name="lines" value="G"/><label for="G">G</label><br />            
                 <input type="checkbox" name="lines" value="JZ"/><label for="JZ">J,Z</label><br />  
                 <input type="checkbox" name="lines" value="L"/><label for="L">L</label><br />
                 <input type="checkbox" name="lines" value="NQR"/><label for="NQR">N,Q,R</label><br />
                 <input type="checkbox" name="lines" value="S"/><label for="S">S</label><br />
                 <input type="checkbox" name="lines" value="SIR"/><label for="SIR">S,I,R</label><br /></fieldset><p>
             <button type="submit">submit your info</button></p>
            </form></fieldset>
           </body>
           <?php
           ?>
         </html>"""

    @cherrypy.expose
    def submit(self, email, time, lines):          # email is a string, time is a string, lines is a list
        lineChoice = ','.join(lines) 
        addProfile(data,email,time,lines)
        return "Your email is: " + email + "\nYour time of commute is: " + time + "\nYour lines of choice are: " + lineChoice

if __name__ == '__main__':
    conf = {
        '/': {
            'tools.sessions.on': True,
            'tools.staticdir.root': os.path.abspath(os.getcwd())
        },
        '/static': {
            'tools.staticdir.on': True,
            'tools.staticdir.dir': './public'
        }
    }
    cherrypy.quickstart(MTA_Notification(), '/', conf)



[12/Dec/2017:19:32:00] ENGINE Listening for SIGTERM.
[12/Dec/2017:19:32:00] ENGINE Listening for SIGHUP.
[12/Dec/2017:19:32:00] ENGINE Listening for SIGUSR1.
[12/Dec/2017:19:32:00] ENGINE Bus STARTING
CherryPy Checker:
'/Users/Sam_I_Am/Documents/Stern/Semester 3/Programming for DS/PDS_jupyter/GitHub/srf366/MTA_Project/./public' (root + dir) is not an existing filesystem path.
section: [/static]
root: '/Users/Sam_I_Am/Documents/Stern/Semester 3/Programming for DS/PDS_jupyter/GitHub/srf366/MTA_Project'
dir: './public'

[12/Dec/2017:19:32:00] ENGINE Started monitor thread 'Autoreloader'.
[12/Dec/2017:19:32:00] ENGINE Started monitor thread '_TimeoutMonitor'.
[12/Dec/2017:19:32:00] ENGINE Serving on http://127.0.0.1:8080
[12/Dec/2017:19:32:00] ENGINE Bus STARTED


127.0.0.1 - - [12/Dec/2017:19:32:08] "GET / HTTP/1.1" 200 1991 "http://localhost:8888/notebooks/GitHub/srf366/MTA_Project/AllCombined_Test20171212.ipynb" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/604.3.5 (KHTML, like Gecko) Version/11.0.1 Safari/604.3.5"
127.0.0.1 - - [12/Dec/2017:19:32:08] "GET /static/css/style.css HTTP/1.1" 404 1295 "http://127.0.0.1:8080/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/604.3.5 (KHTML, like Gecko) Version/11.0.1 Safari/604.3.5"
127.0.0.1 - - [12/Dec/2017:19:32:30] "GET /submit?email=baba%40blacksheep.com&time=22%3A22&lines=456&lines=7&lines=ACE HTTP/1.1" 200 101 "http://127.0.0.1:8080/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/604.3.5 (KHTML, like Gecko) Version/11.0.1 Safari/604.3.5"


[12/Dec/2017:19:36:30] ENGINE Keyboard Interrupt: shutting down bus
[12/Dec/2017:19:36:30] ENGINE Bus STOPPING
[12/Dec/2017:19:36:30] ENGINE HTTP Server cherrypy._cpwsgi_server.CPWSGIServer(('127.0.0.1', 8080)) shut down
[12/Dec/2017:19:36:30] ENGINE Stopped thread 'Autoreloader'.
[12/Dec/2017:19:36:30] ENGINE Stopped thread '_TimeoutMonitor'.
[12/Dec/2017:19:36:30] ENGINE Bus STOPPED
[12/Dec/2017:19:36:30] ENGINE Bus EXITING
[12/Dec/2017:19:36:30] ENGINE Bus EXITED
[12/Dec/2017:19:36:30] ENGINE Waiting for child threads to terminate...


In [22]:
data

Unnamed: 0,0,1,2,3
User,yk1859@nyu.edu,srf366@stern.nyu.edu,fh643@nyu.edu,baba@blacksheep.com
Time,19:31,19:31,19:31,22:22
123,0,0,0,0
456,0,0,0,1
7,0,0,0,1
ACE,1,1,0,1
BDFM,1,1,1,0
G,0,1,1,0
JZ,0,1,0,0
L,0,0,1,0


In [None]:
delayed

In [None]:
listToNotify(data,delayed)

In [23]:
print(data)

                   0                     1              2                    3
User  yk1859@nyu.edu  srf366@stern.nyu.edu  fh643@nyu.edu  baba@blacksheep.com
Time           19:31                 19:31          19:31                22:22
123                0                     0              0                    0
456                0                     0              0                    1
7                  0                     0              0                    1
ACE                1                     1              0                    1
BDFM               1                     1              1                    0
G                  0                     1              1                    0
JZ                 0                     1              0                    0
L                  0                     0              1                    0
NQR                0                     0              0                    0
S                  1                     0          

In [None]:
clear(data)
data

In [None]:
listToNotify(data,delayed)