In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib as plt

In [3]:
df = pd.read_csv("_roads3.csv")
df.head()


Unnamed: 0,road,chainage,lrp,lat,lon,gap,type,name
0,N1,0.0,LRPS,23.706028,90.443333,,Others,Start of Road after Jatrabari Flyover infront...
1,N1,0.814,LRPSa,23.702917,90.450417,,Culvert,Box Culvert
2,N1,0.822,LRPSb,23.702778,90.450472,,CrossRoad,Intersection with Z1101
3,N1,1.0,LRP001,23.702139,90.451972,,KmPost,Km post missing
4,N1,2.0,LRP002,23.697889,90.460583,,KmPost,Km post missing


In [4]:
#only road N1
df2 = df[df['road']=='N1'].copy()

In [5]:
#only bridge nodes
df3 = df2[df2['type']=="Bridge"].copy()

In [6]:
#note there are also types: Bridge / Bridge and Bridge / Culvert

df2['type'].unique()

array(['Others', 'Culvert', 'CrossRoad', 'KmPost', 'SideRoad,Right',
       'Bridge', 'SideRoad,Left', 'Others / Others', 'RailRoadCrossing',
       'CrossRoad / KmPost', 'Bridge / Bridge', 'KmPost / Culvert',
       'Bridge / Culvert', 'SideRoad,Right / SideRoad,Right',
       'KmPost / KmPost', 'Culvert / Culvert'], dtype=object)

In [7]:
df3.head()

Unnamed: 0,road,chainage,lrp,lat,lon,gap,type,name
14,N1,8.011,LRP008a,23.702111,90.515778,BS,Bridge,Kachpur bridge
15,N1,8.429,LRP008b,23.704583,90.518833,BE,Bridge,Bridge end
26,N1,11.745,LRP012a,23.691111,90.544889,BS,Bridge,Bridge start
27,N1,11.771,LRP012b,23.691055,90.545139,BE,Bridge,Bridge end
36,N1,16.242,LRP016b,23.659111,90.569722,BS,Bridge,Langalbandhu bridge


In [7]:
len(df3)


315

In [8]:
%timeit df2['type'][0]
df2['type'][0]

The slowest run took 32.69 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 12.9 µs per loop


'Others'

In [9]:
#be aware of the pitfall of using chain assignments of the form df.loc[...][...] = value; 
#Here, Pandas evaluates df.loc[...] first, which may be a view or a copy.
%timeit df2.loc[0]['type']
df2.loc[0]['type']

10000 loops, best of 3: 137 µs per loop


'Others'

In [10]:
#if you make an assignment of the form df.loc[...] = value; then you can trust Pandas to alter df itself.

%timeit df2.loc[0, 'type']
df2.loc[0, 'type']

10000 loops, best of 3: 175 µs per loop


'Others'

In [11]:
%timeit df2.ix[0, 'type']
df2.ix[0, 'type']

The slowest run took 9.74 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 5.97 µs per loop


'Others'

In [12]:
df2.ix[0, 'type']= "Source"
df2.ix[ len(df2)-1, 'type'] = "Sink"


In [13]:
#check if the last value of type is "Sink"
df2.tail()

Unnamed: 0,road,chainage,lrp,lat,lon,gap,type,name
1334,N1,461.476,LRP466a,20.86886,92.298222,,Culvert,Box culvert
1335,N1,461.904,LRP466b,20.865028,92.29825,BS,Bridge,Bridge start
1336,N1,461.946,LRP466c,20.864667,92.298194,BE,Bridge,Bridge end
1337,N1,462.124,LRP467,20.862972,92.298083,,KmPost,Infor.missing
1338,N1,462.254,LRPE,20.862917,92.298083,,Sink,"End of Road at Shapla Chattar ,Teknaf Meet wit..."


In [14]:
#check if the first value of type is "Source"
df2.head()

Unnamed: 0,road,chainage,lrp,lat,lon,gap,type,name
0,N1,0.0,LRPS,23.706028,90.443333,,Source,Start of Road after Jatrabari Flyover infront...
1,N1,0.814,LRPSa,23.702917,90.450417,,Culvert,Box Culvert
2,N1,0.822,LRPSb,23.702778,90.450472,,CrossRoad,Intersection with Z1101
3,N1,1.0,LRP001,23.702139,90.451972,,KmPost,Km post missing
4,N1,2.0,LRP002,23.697889,90.460583,,KmPost,Km post missing


In [8]:
first_row = df2.head(1)
last_row = df2.tail(1)

df4 = pd.concat([first_row, df3, last_row]).reset_index()
del df4['index']
df4.head()


Unnamed: 0,road,chainage,lrp,lat,lon,gap,type,name
0,N1,0.0,LRPS,23.706028,90.443333,,Others,Start of Road after Jatrabari Flyover infront...
1,N1,8.011,LRP008a,23.702111,90.515778,BS,Bridge,Kachpur bridge
2,N1,8.429,LRP008b,23.704583,90.518833,BE,Bridge,Bridge end
3,N1,11.745,LRP012a,23.691111,90.544889,BS,Bridge,Bridge start
4,N1,11.771,LRP012b,23.691055,90.545139,BE,Bridge,Bridge end


In [16]:
df4.tail()

Unnamed: 0,road,chainage,lrp,lat,lon,gap,type,name
312,N1,460.589,LRP465a,20.87686,92.297889,BS,Bridge,Bridge start
313,N1,460.632,LRP465b,20.876499,92.297889,BE,Bridge,Bridge end
314,N1,461.904,LRP466b,20.865028,92.29825,BS,Bridge,Bridge start
315,N1,461.946,LRP466c,20.864667,92.298194,BE,Bridge,Bridge end
316,N1,462.254,LRPE,20.862917,92.298083,,Sink,"End of Road at Shapla Chattar ,Teknaf Meet wit..."


In [17]:
lat_mean = df4['lat'].mean()
lon_mean = df4['lon'].mean()

In [18]:
df4['z'] = -df4['lat'] + lat_mean
df4['x'] = df4['lon'] - lon_mean
df4.head()

Unnamed: 0,road,chainage,lrp,lat,lon,gap,type,name,z,x
0,N1,0.0,LRPS,23.706028,90.443333,,Source,Start of Road after Jatrabari Flyover infront...,-1.539999,-1.292958
1,N1,8.011,LRP008a,23.702111,90.515778,BS,Bridge,Kachpur bridge,-1.536082,-1.220514
2,N1,8.429,LRP008b,23.704583,90.518833,BE,Bridge,Bridge end,-1.538554,-1.217459
3,N1,11.745,LRP012a,23.691111,90.544889,BS,Bridge,Bridge start,-1.525082,-1.191403
4,N1,11.771,LRP012b,23.691055,90.545139,BE,Bridge,Bridge end,-1.525026,-1.191153


In [19]:
for i in np.arange(len(df4)-1):
    df4.ix[i, 'difference'] = df4.ix[i + 1, 'chainage'] - df4.ix[i, 'chainage']

In [20]:
#Gap = BS = bridge start (has data on length of bridge)
#Gap = BE = bridge end (has data on length of road to next bridge)
df4

Unnamed: 0,road,chainage,lrp,lat,lon,gap,type,name,z,x,difference
0,N1,0.000,LRPS,23.706028,90.443333,,Source,Start of Road after Jatrabari Flyover infront...,-1.539999,-1.292958,8.011
1,N1,8.011,LRP008a,23.702111,90.515778,BS,Bridge,Kachpur bridge,-1.536082,-1.220514,0.418
2,N1,8.429,LRP008b,23.704583,90.518833,BE,Bridge,Bridge end,-1.538554,-1.217459,3.316
3,N1,11.745,LRP012a,23.691111,90.544889,BS,Bridge,Bridge start,-1.525082,-1.191403,0.026
4,N1,11.771,LRP012b,23.691055,90.545139,BE,Bridge,Bridge end,-1.525026,-1.191153,4.471
5,N1,16.242,LRP016b,23.659111,90.569722,BS,Bridge,Langalbandhu bridge,-1.493082,-1.166570,0.160
6,N1,16.402,LRP016c,23.658306,90.570999,BE,Bridge,Bridge end,-1.492277,-1.165292,0.429
7,N1,16.831,LRP017a,23.655972,90.574333,BS,Bridge,Bridge start,-1.489943,-1.161959,0.021
8,N1,16.852,LRP017b,23.655917,90.574527,BE,Bridge,Bridge end,-1.489888,-1.161764,0.352
9,N1,17.204,LRP017c,23.654056,90.577277,BS,Bridge,Bridge start,-1.488027,-1.159014,0.030


In [21]:
df4['gap'].isnull().sum()
#amount of NaN values in gap:

17

In [22]:
df4.loc[df4['name']=="Bridge end", 'gap'] = "BE"

In [23]:
df4['gap'].isnull().sum()
#improved a bit:

11

In [24]:
df4.loc[df4['name']=="Bridge start", 'gap'] = "BS"

In [25]:
df4['gap'].isnull().sum()
#even more improved:

7

In [None]:
df4[df4['gap'].isnull()]

In [26]:
for i in np.arange(1,len(df4)-2):
    if df4.ix[i, 'gap'] != 'BE' and df4.ix[i, 'gap'] != 'BS':
        if df4.ix[i-1, 'gap'] == 'BS':
            df4.ix[i, 'gap'] = 'BE'
        if df4.ix[i-1, 'gap'] == 'BE':
            df4.ix[i, 'gap'] = 'BS'

In [27]:
df4['gap'].isnull().sum()
#finally:

2

In [28]:
df4

Unnamed: 0,road,chainage,lrp,lat,lon,gap,type,name,z,x,difference
0,N1,0.000,LRPS,23.706028,90.443333,,Source,Start of Road after Jatrabari Flyover infront...,-1.539999,-1.292958,8.011
1,N1,8.011,LRP008a,23.702111,90.515778,BS,Bridge,Kachpur bridge,-1.536082,-1.220514,0.418
2,N1,8.429,LRP008b,23.704583,90.518833,BE,Bridge,Bridge end,-1.538554,-1.217459,3.316
3,N1,11.745,LRP012a,23.691111,90.544889,BS,Bridge,Bridge start,-1.525082,-1.191403,0.026
4,N1,11.771,LRP012b,23.691055,90.545139,BE,Bridge,Bridge end,-1.525026,-1.191153,4.471
5,N1,16.242,LRP016b,23.659111,90.569722,BS,Bridge,Langalbandhu bridge,-1.493082,-1.166570,0.160
6,N1,16.402,LRP016c,23.658306,90.570999,BE,Bridge,Bridge end,-1.492277,-1.165292,0.429
7,N1,16.831,LRP017a,23.655972,90.574333,BS,Bridge,Bridge start,-1.489943,-1.161959,0.021
8,N1,16.852,LRP017b,23.655917,90.574527,BE,Bridge,Bridge end,-1.489888,-1.161764,0.352
9,N1,17.204,LRP017c,23.654056,90.577277,BS,Bridge,Bridge start,-1.488027,-1.159014,0.030


In [30]:
# delete the BS values
df5 = df4[df4['gap'] != "BS"]
df5.head()
#difference is now the distance of the road to the next bridge

Unnamed: 0,road,chainage,lrp,lat,lon,gap,type,name,z,x,difference
0,N1,0.0,LRPS,23.706028,90.443333,,Source,Start of Road after Jatrabari Flyover infront...,-1.539999,-1.292958,8.011
2,N1,8.429,LRP008b,23.704583,90.518833,BE,Bridge,Bridge end,-1.538554,-1.217459,3.316
4,N1,11.771,LRP012b,23.691055,90.545139,BE,Bridge,Bridge end,-1.525026,-1.191153,4.471
6,N1,16.402,LRP016c,23.658306,90.570999,BE,Bridge,Bridge end,-1.492277,-1.165292,0.429
8,N1,16.852,LRP017b,23.655917,90.574527,BE,Bridge,Bridge end,-1.489888,-1.161764,0.352


In [33]:
# delete the BE values
df6 = df4[df4['gap'] == "BS"]
df6.head()
#difference is now the length of a bridge (except for the source)

Unnamed: 0,road,chainage,lrp,lat,lon,gap,type,name,z,x,difference
1,N1,8.011,LRP008a,23.702111,90.515778,BS,Bridge,Kachpur bridge,-1.536082,-1.220514,0.418
3,N1,11.745,LRP012a,23.691111,90.544889,BS,Bridge,Bridge start,-1.525082,-1.191403,0.026
5,N1,16.242,LRP016b,23.659111,90.569722,BS,Bridge,Langalbandhu bridge,-1.493082,-1.16657,0.16
7,N1,16.831,LRP017a,23.655972,90.574333,BS,Bridge,Bridge start,-1.489943,-1.161959,0.021
9,N1,17.204,LRP017c,23.654056,90.577277,BS,Bridge,Bridge start,-1.488027,-1.159014,0.03


In [None]:
#old stuff:

In [None]:
df4.to_csv("output.csv")

In [None]:
len(df4)

In [None]:
df4.head()

In [None]:
df4['y'] = 0
df4.head()

In [None]:
#todo: get condition, length, width and height from other file
#todo: links (tab2)
df5 = pd.concat([df4['type'], df4['lrp'], df4['x'], df4['y'], df4['z'] ], axis=1, keys=['Object Class', 'Object Name' , 'X', 'Y', 'Z'])
df5



In [None]:
df5.plot(x='X', y='Z')

In [None]:
df[df['road']=='N1'].plot(x='lon', y='lat')