In [56]:
# This Python script measures traffic on the Poniatowski's bridge in Warsaw.
# It measures traffic for buses (simple modification for trams possible) in both directions separately.
# Finally, it counts number of observations in 15-minute "bins"
# It is our solution from the hacka
# Format of input data is coherent with Open Warsaw Api data format (5 columns). 

# Data has it's defects. Frequently, time between subsequent signals from given vehicle is bigger than 10s. 
# It is not rare to have 15-20s breaks in signal. 
# It makes further analysis harder. In our case sometimes it was not able to detect in which direction the vehicle is moving, because there was only one observation from the bridge.

In [57]:
import datetime
import pandas as pd
import numpy as np

In [58]:
data = pd.read_csv('data/2017_09_21.csv', names=["versionID", #String
   "line",#String,
   "brigade", #String
   "time",#String,
   "lon",#Double,
   "lat",#Double,
   "rawLon",#Double,
   "rawLat",#Double,
   "status",#String,
   "delay",#String,
   "delayAtStop",#String,
   "plannedLeaveTime",#String,
   "nearestStop",#String,
   "nearestStopDistance",#Double,
   "nearestStopLon",#Double,
   "nearestStopLat",#Double,
   "previousStop",#String,
   "previousStopLon",#Double,
   "previousStopLat",#Double,
   "previousStopDistance",#Double,
   "previousStopArrivalTime",#String,
   "previousStopLeaveTime",#String,
   "nextStop",#String,
   "nextStopLon",#Double,
   "nextStopLat",#Double,
   "nextStopDistance",#Double,
   "nextStopTimetableVisitTime",#String,
   "courseIdentifier",#String,
   "courseDirection",#String,
   "timetableIdentifier",#String,
   "timetableStatus",#String,
   "receivedTime",#String,
   "processingFinishedTime",#String
   "onWayToDepot" ,#String
   "overlapsWithNextBrigade",#String
   "atStop",#String
   "overlapsWithNextBrigadeStopLineBrigade",#String
   "speed"], delimiter=';', index_col=False)

In [59]:
minLon = 21.038172
maxLon = 21.042227
minLat = 52.235431
maxLat = 52.236351

In [60]:
poniat = data[(data.lon > minLon) & (data.lon < maxLon) & (data.lat > minLat) & (data.lat <maxLat)]
poniat = poniat[(poniat.time > "2017-09-21 06:00:00") & (poniat.time < "2017-09-21 22:00:00") ]

In [61]:
poniat.courseDirection.unique()

array(['Gocławek', 'P+R Al.Krakowska', 'Wiatraczna',
       'Kawęczyńska-Bazylika', 'Piaski', 'Nowe Bemowo', 'Banacha',
       'Annopol', 'pl.Starynkiewicza', 'Wojnicka', 'most Poniatowskiego',
       'Okopowa', 'Ochota-Ratusz'], dtype=object)

In [62]:
poniat = poniat[poniat.courseDirection != "most Poniatowskiego"]

In [63]:
poniat.loc[ poniat.courseDirection == 'P+R Al.Krakowska', "courseDirection"] = 'WEST'
poniat.loc[ poniat.courseDirection == 'Wiatraczna', "courseDirection"] = 'EAST'
poniat.loc[ poniat.courseDirection == 'Okopowa', "courseDirection"] = 'WEST'
poniat.loc[ poniat.courseDirection == 'pl.Narutowicza', "courseDirection"] = 'WEST'
poniat.loc[ poniat.courseDirection == 'Gocławek', "courseDirection"] = 'EAST'
poniat.loc[ poniat.courseDirection == 'pl.Starynkiewicza', "courseDirection"] = 'WEST'
poniat.loc[ poniat.courseDirection == 'Ochota-Ratusz', "courseDirection"] = 'WEST'
poniat.loc[ poniat.courseDirection == 'Banacha', "courseDirection"] = 'WEST'
poniat.loc[ poniat.courseDirection == 'Piaski', "courseDirection"] = 'WEST'
poniat.loc[ poniat.courseDirection == 'Nowe Bemowo', "courseDirection"] = 'WEST'
poniat.loc[ poniat.courseDirection == 'Annopol', "courseDirection"] = 'EAST'
poniat.loc[ poniat.courseDirection == 'Kawęczyńska-Bazylika', "courseDirection"] = 'EAST'
poniat.loc[ poniat.courseDirection == 'Wojnicka', "courseDirection"] = 'EAST'
poniat.loc[ poniat.courseDirection == 'al.Zieleniecka', "courseDirection"] = 'EAST'
poniat.loc[ poniat.courseDirection == 'rondo Waszyngtona', "courseDirection"] = 'EAST'

In [64]:
poniat.courseDirection.unique()

array(['EAST', 'WEST'], dtype=object)

In [65]:
poniat = poniat.sort_values(["line", "brigade"])
poniat = poniat.reset_index()
poniat.time = pd.to_datetime(poniat.time)

In [66]:
prevline = -1
prevbrigade = -1
prevtime = pd.to_datetime("2017-01-01 07:00:00")


for index, row in poniat.iterrows():
    curline = row['line']
    curbrigade = row['brigade']
    curtime = row['time']
    
    newBrigade = False
    
    if (~curbrigade == prevbrigade) | (~curline == prevline):
        prevtime = pd.to_datetime("2017-01-01 07:00:00")
        newBrigade = True
        
    if ((curtime - prevtime).total_seconds() < 240) & ~newBrigade:
        poniat.drop(index, inplace=True)
        
    prevline = curline
    prevbrigade = curbrigade
    prevtime = curtime

In [71]:
eastseries = poniat[poniat.courseDirection == 'EAST']['time']
westseries = poniat[poniat.courseDirection == 'WEST']['time']

In [72]:
westseries = pd.DataFrame(westseries)
westseries.columns = ["time"]
westseries = westseries.assign(count=np.full((len(westseries), 1), 1))
westseries = westseries.sort_values("time")
westseries = westseries.reset_index().set_index("time").resample('15T').sum()["count"]

eastseries = pd.DataFrame(eastseries)
eastseries.columns = ["time"]
eastseries = eastseries.assign(count=np.full((len(eastseries), 1), 1))
eastseries = eastseries.sort_values("time")
eastseries = eastseries.reset_index().set_index("time").resample('15T').sum()["count"]

In [282]:
westseries.to_csv("west_series.csv")

In [283]:
eastseries.to_csv("east_series.csv")