# General Set Up

In [1]:
import pandas as pd
import numpy as np
import time
import smtplib
from bs4 import BeautifulSoup
import xml.etree.ElementTree as ET
import requests

## Creating XML Tree Object ##

TARGET = 'servicestatus.json'
SOURCE = 'http://web.mta.info/status/serviceStatus.txt'

response = requests.get(SOURCE)
xml_string = response.text
root = ET.fromstring(xml_string)
# ET.fromstring converts a string of XML content to a XML tree object.


## Creating a list of all lines ##

alllines=['123','456','7','ACE','BDFM','G','JZ','L','NQR','S','SIR']

# Function to intake xml root and return full list of MTA lines present in the XML. 
(Should always be the same.) 

In [5]:
def getFullLineList(xmlroot):
    FullLineList = []
    num = 0
    
    while num < len(root[2]):
        FullLineList.append((num,root[2][num][0].text))
        num += 1
        
    return FullLineList

FullLineList = getFullLineList(root)
FullLineList

[(0, '123'),
 (1, '456'),
 (2, '7'),
 (3, 'ACE'),
 (4, 'BDFM'),
 (5, 'G'),
 (6, 'JZ'),
 (7, 'L'),
 (8, 'NQR'),
 (9, 'S'),
 (10, 'SIR')]

# Function to intake service status xml object and return lines with service changes as a list of tuples

In [2]:
def Return_SC_Lines(xmlroot):
    # xmlroot must be an xml object of the MTA Service Status page using xml Element Tree package
    num_lines = list(range(0,len(xmlroot[2])))
    SC_lines = []
    
    for ea in num_lines:
        if xmlroot[2][ea][1].text != 'GOOD SERVICE':
            SC_lines.append((xmlroot[2][ea][0].text,root[2][ea][1].text))
        else:
            pass 
        
    #SC_Lines is a list of tuples with the line name and service status 
    return SC_lines
    
SC_lines = Return_SC_Lines(root)
SC_lines

[('123', 'PLANNED WORK'),
 ('456', 'PLANNED WORK'),
 ('BDFM', 'DELAYS'),
 ('NQR', 'DELAYS')]

# Function to take line name and service status XML and return beautiful soup of that line's service status 

In [3]:
def MakeLine_ServiceStatusSoup(line_name,xmlroot): 
    line_iq = line_name
    line_iq_num = ''
    
    for ea in FullLineList:
        if ea[1] == line_iq:
            line_iq_num = ea[0]
    
    # get string and make soup from line in question's status
    status_str = root[2][line_iq_num][2].text
    linestatus_soup = BeautifulSoup(status_str, 'lxml')
    
    return linestatus_soup


# bdfmsoup = MakeLine_ServiceStatusSoup('BDFM',root)

# Function intake service status xml and a line with planned work and return description of the planned work

In [6]:
def plannedWork_Simple(line_servicestatus_soup):
    
    tag_options = ['span']
    classname_options = ['TitlePlannedWork']
    subtag_options = ['p']
    pwSimple_text = ''

    for spans in line_servicestatus_soup.find_all(tag_options, {'class':classname_options}):
        subs = spans.find_all_next(subtag_options)
        for each in subs:
            pwSimple_text += each.text
            
    return pwSimple_text


# this works for printing summary line, and not when planned work is a span

def plannedWork_Detail(line_servicestatus_soup):
    
    tag_options = ['a']
    classname_options = ['plannedWorkDetailLink']
    subtag_options = ['div','p']
    pwDetail_text = ''
    
    for a_summ in line_servicestatus_soup.find_all(tag_options, {'class':classname_options}):
        pw_summtext = a_summ.text

        #print(pw_summtext,'\n', next_div, '\n')
        pwDetail_text += pw_summtext + ' '
    
    return pwDetail_text
        


def PlannedWorkText(line_name,xmlroot):
    
    line_status_soup = MakeLine_ServiceStatusSoup(line_name,xmlroot)
    service_string = ''
    
    line_iq = line_name
    line_iq_num = ''
    
    for ea in FullLineList:
        if ea[1] == line_iq:
            line_iq_num = ea[0]
        
        
    if len(line_status_soup.find_all('a', {'class':'plannedWorkDetailLink'})) < 1:
        service_string = plannedWork_Simple(line_status_soup)
            
    else:
        service_string = plannedWork_Detail(line_status_soup)

        
        
    return [line_name, xmlroot[2][line_iq_num][1].text, service_string]
    
    
print(PlannedWorkText('123',root)[2])
 

TRACK & TRACK PLATE INSTALLATION, REPLACEMENT OF POWER & COMMUNICATION CABLES[2] [3] Trains run at reduced speed through the Clark St Tunnel between Manhattan and Brooklyn 


# Function to intake service status xml and a line name and return text of delays on that line, if applicable 

In [8]:
def delays_text(line_name,xmlroot):

    line_status_soup = MakeLine_ServiceStatusSoup(line_name,xmlroot)
    delay_string = ''
    
    delayline_name = line_name
    dealyline_num = ''
    
    for ea in FullLineList:
        if ea[1] == delayline_name:
            dealyline_num = ea[0]


    del_text = ''
    
    for del1 in line_status_soup.find_all('span', {'class': 'TitleDelay'}):
        
        # below checks that the delay description is contained in <p> tags
        if len(del1.find_all('p')) > 0:
            
            delay_deets = del1.find_all('p')
            
            for dels in delay_deets:
                del_text += dels.text + ' '
            
            
        else:
            del_text = ''.join(line_status_soup.find_all(text=True)[3:5]).strip()
            
            
    return del_text
print(delays_text('NQR',root))

Northbound [N] trains are running with delays because of signal problems at Ft Hamilton Pkwy


# Function to return list of delayed lines

<font color='red'>below should probably be replaced with return_SC_lines function, not just looking for ones with delays.</font>

In [9]:
def delayedLines(xmlroot):
    SC_lines = Return_SC_Lines(xmlroot)
    l=[]
    for i in SC_lines:
        if i[1]=='DELAYS':
            l.append(i[0])
    return l

delayed=delayedLines(root)
delayed

['BDFM', 'NQR']

# Initialize the User's data

In [10]:
def init():
    data=pd.DataFrame(index=['User','Time','123','456','7','ACE','BDFM','G','JZ','L','NQR','S','SIR'],dtype=str)
    return data

# Function to encode a list of lines as a list of bits, based on the alllines table

<font color='red'>adjust bottom function so it intakes SC_lines function. SC lines function should = the new delayed object above.</font>

In [11]:
def binaryConverter(line):
    l=alllines[:]
    for i in range(len(l)):
        if l[i] in line:
            l[i]=1
        else:
            l[i]=0
    return l

def binaryDecoder(line):
    s=[]
    for i in range(len(line)):
        if line[i]==1:
            s.append(alllines[i])
    return s

print(delayed)
binary=binaryConverter(delayed)
print(binary)
regular=binaryDecoder(binary)
regular

['BDFM', 'NQR']
[0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0]


['BDFM', 'NQR']

# Functions to add or remove a user's profile with his email, commuting time and the lines he is taking

In [12]:
def addProfile(data,email,time,lines):
    '''
    adds commuting time in the list of all commuting times, adds time and lines for that user
    @param email: user's email
    @param time: commuting time
    @param lines: list of the lines concerned by those commuting times
    '''
    l=[email,time]
    line=binaryConverter(lines)
    for i in line:
        l.append(i)
    try:
        col=max(data.columns)+1
    except ValueError:
        col=0
    data[col]=l

def removeUser(data,email):
    p=0
    for i in data.iloc[0]:
        if i==email:
            del data[p]
        p+=1

def clear(data):
    for i in data.columns:
        del data[i]

# Define the time limit after which we notify the users

In [13]:
timelimit=15

# Function that gives, based on the list of delayed lines, the list of people to notify by email

In [43]:
def listToNotify(data,delayedlines):
    hour=time.localtime()[3]
    minutes=time.localtime()[4]
    time_t=hour*60+minutes
    #time_t is total number of minutes from 00:00
    notif=[]
    p=0
    l=binaryConverter(delayedlines)
    for col in data.columns:  #t is in 24hours format hh:mm
        column=data[col]
        t=column[1]
        # t is time entered by user, how about usr_commute_starttime
        print(t)
        h=int(t[0:2]) #gets hh
        m=int(t[3:])   #gets mm
        tt=h*60+m
        if h==0:
            if m>=timelimit:
                tt=m
            else:
                tt=24*60+m
        # below should be changed so that the difference between usr_commute_starttime and localtime can be 
        # if we can change so only one email goes out per day it can be anytime within 15 mins 
        if tt-time_t == timelimit:
            lines=column[2:]
            e=sum(np.logical_and(lines,l))
            if e>0:
                notif.append((column[0],col))
    return list(set(notif))

# Function to trigger the emails

In [15]:
def emailTrigger(data,lines):
    notif_delay=listToNotify(data,lines)
    for user in notif_delay:
        sendmail(data,user,lines)

# Function to send emails

In [16]:
def sendmail(data,user,lines):
    mail=user[0]
    index=user[1]
    server = smtplib.SMTP('smtp.gmail.com:587')
    server.ehlo()
    server.starttls()
    server.login("notificationmta@gmail.com", "PyProject1859")
    
    l=binaryConverter(lines)
    col=data[index]
    c=col[2:]
    l=np.logical_and(l,c)
    l=binaryDecoder(l)
    
    if len(lines)>2:
        s=''
        for i in range(len(lines)-1):
            s=s+lines[i]+', '
        s=s+'and '+lines[-1]
    elif len(lines)==2:
        s=lines[0]+' and '+lines[1]
    else:
        s=lines[0]
    
    if len(l)>2:
        s2=''
        for i in range(len(l)-1):
            s2=s2+l[i]+', '
        s2=s2+l[-1]
    elif len(l)==2:
        s2=lines[0]+' and '+lines[1]
    else:
        s2=lines[0]
    #Send the mail
    msg = "\r\n".join([
            "From: subwayhelper@gmail.com",
            "To: "+mail,
            "Subject: PyProject 3rd Test ALERT: Delays on line(s) "+str(s2),
            "",
            'Dear user, \n There are currently delays on lines '+s+'. As you indicated that you take line(s) '+str(s2)+' you are concerned by those delays and should consider another route. \n Thanks for using our platform, \n The subway helper team'
            ])
        # The /n separates the message from the headers
    server.sendmail("subwayhelper@gmail.com", mail, msg)
    print('Email successfully sent to '+str(mail))
    server.quit()

# Workspace

In [21]:
user_data = init()
user_data

User
Time
123
456
7
ACE
BDFM
G
JZ
L
NQR


In [22]:
import time
print(time.localtime()[3])
print(time.localtime()[4])

21
45


In [45]:
addProfile(user_data,'yk1859@nyu.edu','00:06',['ACE','BDFM','S'])
addProfile(user_data,'srf366@stern.nyu.edu','22:20',['G','JZ','BDFM','ACE'])
addProfile(user_data,'fh643@nyu.edu','00:06',['G','L','S','BDFM'])

In [46]:
user_data

Unnamed: 0,0,1,2
User,yk1859@nyu.edu,srf366@stern.nyu.edu,fh643@nyu.edu
Time,00:06,22:20,00:06
123,0,0,0
456,0,0,0
7,0,0,0
ACE,1,1,0
BDFM,1,1,1
G,0,1,1
JZ,0,1,0
L,0,0,1


In [47]:
delayed

['BDFM', 'NQR']

In [48]:
listToNotify(user_data,delayed)

00:06
22:20
00:06


[('srf366@stern.nyu.edu', 1)]

In [None]:
emailTrigger(data,delayed)

In [44]:
clear(user_data)
user_data

User
Time
123
456
7
ACE
BDFM
G
JZ
L
NQR


In [None]:
listToNotify(data,delayed)