In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
from shapely.geometry import Polygon, Point, LineString
from datetime import datetime

## Read shapefiles into variables

In [2]:
# Read shape files into geopandas
right_half_tri_SF = gpd.read_file("shapefiles/right_half_triangle/right_half_triangle.shp")[['Name', 'geometry']]
left_half_tri_SF = gpd.read_file("shapefiles/left_half_triangle/left_half_triangle.shp")[['Name', 'geometry']]
island_SF = gpd.read_file("shapefiles/island/island.shp")[['Name', 'geometry']]
newport_SF = gpd.read_file("shapefiles/inundation_map/inundation_map.shp")[['Name', 'geometry']]
# Read Street Shape Files
balboa_blvd = gpd.read_file("shapefiles/Streets/Balboa/Balboa_Blvd-polygon.shp")[['Name', 'geometry']]
w_balboa_blvd = gpd.read_file("shapefiles/Streets/W_Balboa/W_Balboa-polygon.shp")[['Name', 'geometry']]
newport = gpd.read_file("shapefiles/Streets/Newport/Newport_Blvd-polygon.shp")[['Name', 'geometry']]

In [3]:
section_shapefile_list = [right_half_tri_SF,left_half_tri_SF,island_SF,newport_SF]
street_shapefile_list = [balboa_blvd,w_balboa_blvd,newport]

In [4]:
for i in range(len(section_shapefile_list)):

    print(section_shapefile_list[i]['Name'][0])

right_half_triangle
left_half_triangle
Island
Inundation Map


## Read data for each day

In [5]:
# Assign data for each day
d_1_07012018 = 'Data/Probe_Data/2018_07_01_NewportBeach_basic_probe.csv'
d_2_07022018 = 'Data/Probe_Data/2018_07_02_NewportBeach_basic_probe.csv'
d_3_07032018 = 'Data/Probe_Data/2018_07_03_NewportBeach_basic_probe.csv'
d_4_07042018 = 'Data/Probe_Data/2018_07_04_NewportBeach_basic_probe.csv'
d_5_07052018 = 'Data/Probe_Data/2018_07_05_NewportBeach_basic_probe.csv'
d_6_07062018 = 'Data/Probe_Data/2018_07_06_NewportBeach_basic_probe.csv'
d_7_07072018 = 'Data/Probe_Data/2018_07_07_NewportBeach_basic_probe.csv'

In [6]:
data = pd.read_csv(d_2_07022018,
                   names = ["PROBE_ID","SAMPLE_DATE", "LAT", "LONG", "HEADING",\
                           "SPEED","PROBE_DATA_PROVIDER","X","Y", "LOCAL_TIME"])
# This now sorts in date order
data.sort_values(by='SAMPLE_DATE', inplace=True, ascending=True) 
data.head(3)

Unnamed: 0,PROBE_ID,SAMPLE_DATE,LAT,LONG,HEADING,SPEED,PROBE_DATA_PROVIDER,X,Y,LOCAL_TIME
564,33E9A802-F2AC-4D2E-A67F-0DA7010CBEC0,2018-07-02 00:00:00,33.620078,-117.929563,24.0,59.0,CONSUMER14,413775.118525,3720420.0,2018-07-01 17:00:00 -0700
54,33E9A802-F2AC-4D2E-A67F-0DA7010CBEC0,2018-07-02 00:00:01,33.620196,-117.929495,29.0,58.0,CONSUMER14,413781.54385,3720433.0,2018-07-01 17:00:01 -0700
462,33E9A802-F2AC-4D2E-A67F-0DA7010CBEC0,2018-07-02 00:00:02,33.620341,-117.929302,17.0,58.0,CONSUMER14,413799.591224,3720449.0,2018-07-01 17:00:02 -0700


# Analyze Data

In [7]:
class Analyze_Data:
    
    def __init__(self, data_df,section_shapefile_list,street_shapefile_list):
        # Reading data dataframe into class
        self.data = data_df 
        
        # Read shapefiles into class as lists
        self.section = section_shapefile_list
        self.street = street_shapefile_list
        
    
    def plot(self):
        # Plot Data
        plt.figure(figsize = (12,8))
        plt.scatter(self.data.LONG, self.data.LAT, s = 0.5, c = 'k')
        plt.show()
    
    
    def create_point(self,row):
        # Helper function for turning coordinates into shapely points
        return Point(row['LONG'],row['LAT'])
    
    def append_points(self):

        self.data['coord'] = self.data.apply(self.create_point, axis=1)
        self.data['section'] = 'N/A'
        
        return self.data
        
    def get_section(self):
        # For separating data into sections based on defined shapefiles
        
        self.append_points()
        
        section_list = []
        
        for i in self.data.index.values:
            if self.section[0]['geometry'][0].contains(self.data['coord'][i]):
                section_list.append('right_half_tri')
            elif self.section[1]['geometry'][0].contains(self.data['coord'][i]):
                section_list.append('left_half_tri')
            elif self.section[2]['geometry'][0].contains(self.data['coord'][i]):
                section_list.append('island')
            elif self.section[3]['geometry'][0].contains(self.data['coord'][i]):
                section_list.append('rest_of_new_port')
            else:
                section_list.append('N/A')
        
        self.data['section'] = section_list
        
        return self.data
    
    def get_street(self):
        # For separating data into streets based on defined shapefiles
        
        self.get_section()
        street_list = []

        for i in self.data.index.values:
            if self.street[0]['geometry'][0].contains(self.data['coord'][i]):
                street_list.append('balboa')
            elif self.street[1]['geometry'][0].contains(self.data['coord'][i]):
                street_list.append('w_balboa')
            elif self.street[2]['geometry'][0].contains(self.data['coord'][i]):
                street_list.append('newport')
            else:
                street_list.append('N/A')

        self.data['street'] = street_list
        
        return self.data
    
    def clean_data(self):
        # Remove useless columns and set timezone
        self.get_street()
        self.data_cleaned = self.data[self.data.section != 'N/A'][['PROBE_ID','LAT','LONG',\
                                                            'HEADING',\
                                                            'SPEED',\
                                                            'PROBE_DATA_PROVIDER',\
                                                            'LOCAL_TIME',\
                                                            'coord',\
                                                            'section','street']]
        
        self.data_cleaned['LOCAL_TIME'] = pd.to_datetime(self.data_cleaned['LOCAL_TIME'])\
                                        .dt.tz_localize('UTC').dt.tz_convert('America/Los_Angeles')
        # Separate to hours
        self.data_cleaned['HOUR'] = self.data_cleaned['LOCAL_TIME'].dt.hour  
        
        return self.data_cleaned
    
    def clean_data_add_heading(self):
        # Add heading information per probe
        self.clean_data()
        
        heading_fixed = []
        
        for i in self.data_cleaned.index.values:
            if self.data_cleaned['HEADING'][i] >180:
                heading_fixed.append(-(360 - self.data_cleaned['HEADING'][i]))
            else:
                heading_fixed.append(self.data_cleaned['HEADING'][i])
        
        self.data_cleaned['HEADING_FIXED'] = heading_fixed
        
        return self.data_cleaned

    
    def probe_count(self, section):
        # For counting number of uniuqe probes
        self.clean_data_add_heading()
        uniq_probe = self.data_cleaned.loc[self.data_cleaned['section'] == section].PROBE_ID.unique()

        return len(uniq_probe)
    
    

In [8]:
class road_specific_analysis():
    
    def __init__(self, data_cleaned, street_choice):
        self.data = data_cleaned
        self.street = street_choice
    
    def road_df(self):
        # Filter and create dataframe based on road choice
        self.data
        self.road = self.data.loc[self.data['street']== self.street].copy()
        
        return self.road
    
    def get_heading(self, df, i, lower_head, upper_head):
        
        # Helper function for differentiating heading
        head_df = df.loc[(df['HOUR']==i) & \
                         ((df['HEADING_FIXED'] >= lower_head) | \
                         (df['HEADING_FIXED'] <= upper_head))][['PROBE_ID','SPEED']]
        
        # This removes all 0 speed instances
        head_df = head_df[head_df['SPEED'] != 0]
        unique_probe_list = list(set(head_df['PROBE_ID']))
        
        return head_df, unique_probe_list
    
    def get_heading_info(self, limits, speed_limit, trans_type):
        
        '''
        Inputs:
            df - cleaned up dataframe with hours and streets 
            limits - list of heading limits, [lower, higher]
            trans_type - looking for 'vehicle' or 'pedestrian'
            
        Outputs:
            head_speed - list of 24 north heading speeds averaged over an hour
            head_unique_probe - list of 24 counts of unique probe ids in that hour - vehicles
        '''
        
        self.road_df()
        
        head_speed = []
        head_unique_probe = []
        
        for i in range(24):
        # Analyze for 24 hours
            head_df, unique_probe_list = self.get_heading(self.road, i, limits[0], limits[1])
            
            count = 0
            speed = []
            
            for j in unique_probe_list:

                probe_df = head_df.loc[head_df['PROBE_ID'] == j]
                mean_speed = np.mean(probe_df['SPEED'])

                if trans_type == 'Vehicle':

                    if mean_speed >= speed_limit:
                        count += 1
                        speed.append(mean_speed)

                elif trans_type == 'Pedestrian':

                    if mean_speed <= speed_limit:
                        count += 1
                        speed.append(mean_speed)
                    
        
            head_unique_probe.append(count)

            if not speed:
                head_speed.append(0)
            else:
                head_speed.append(np.mean(speed))
            
        return head_speed, head_unique_probe
            

# For getting trajectory

In [9]:
traj = Analyze_Data(data, section_shapefile_list, street_shapefile_list)
traj_df = traj.clean_data_add_heading()
traj_df.head()

Unnamed: 0,PROBE_ID,LAT,LONG,HEADING,SPEED,PROBE_DATA_PROVIDER,LOCAL_TIME,coord,section,street,HOUR,HEADING_FIXED
563,18274272,33.606906,-117.919411,281.0,16.0,FLEET51,2018-07-01 17:00:59-07:00,POINT (-117.9194113 33.60690579999999),rest_of_new_port,w_balboa,17,-79.0
445,18274272,33.607024,-117.920146,0.0,0.0,FLEET51,2018-07-01 17:02:29-07:00,POINT (-117.9201456 33.607024),rest_of_new_port,w_balboa,17,0.0
504,18274272,33.607087,-117.92038,281.0,18.0,FLEET51,2018-07-01 17:02:45-07:00,POINT (-117.9203801 33.6070871),rest_of_new_port,w_balboa,17,-79.0
307,3fd3363b7ec448d8aabd3d1f913d43f3,33.60822,-117.92847,216.0,3.0,CONSUMER21,2018-07-01 17:03:33-07:00,POINT (-117.92847 33.60822),rest_of_new_port,,17,-144.0
273,3fd3363b7ec448d8aabd3d1f913d43f3,33.60815,-117.9284,138.0,4.0,CONSUMER21,2018-07-01 17:03:38-07:00,POINT (-117.9284 33.60815),rest_of_new_port,,17,138.0


In [10]:
uniq_ID = set(traj_df.PROBE_ID)

# Create empty dataframe
columns = ['PROBE_ID','Route_Num','Start_Section','End_Section','Time_Start','Time_End']
traj_route_df = pd.DataFrame(columns=columns)

for ID in uniq_ID:
    
    probe_df = traj_df.loc[traj_df.PROBE_ID == ID]
    
    route = [probe_df.section.iloc[0]]
    route_count = 0
    route_time = [probe_df.LOCAL_TIME.iloc[0]]
    
    for i in range(len(probe_df)-1):

        if probe_df.LOCAL_TIME[i+1] - probe_df.LOCAL_TIME[i]<pd.Timedelta(minutes = 5):

            route.append(probe_df.section.iloc[i+1])
            route_time.append(probe_df.LOCAL_TIME.iloc[i+1])

        else:
            route_count += 1  
            traj_route_df = traj_route_df.append({'PROBE_ID':ID,'Route_Num':route_count,\
                                                  'Start_Section':route[0],'End_Section':route[-1],\
                                                'Time_Start':route_time[0],'Time_End':route_time[-1]},\
                                                ignore_index=True)
            



In [11]:
traj_rest = traj_route_df.loc[traj_route_df['Start_Section'] == 'rest_of_new_port']

In [29]:
right = traj_rest.loc[traj_rest['End_Section'] == 'right_half_tri']
left = traj_rest.loc[traj_rest['End_Section'] == 'left_half_tri']

In [31]:
print('number going right:', len(right))
print('number going left:', len(left))

number going right: 9
number going left: 7


## Notes
### For unique probe count
### Section Options:
- 'right_half_tri'
- 'left_half_tri'
- 'island'
- 'rest_of_new_port'

### Street Options
- 'balboa'
- 'newport'


### Blanket sorting for all data

In [13]:
analysis = Analyze_Data(data, section_shapefile_list, street_shapefile_list)
data_cleaned = analysis.clean_data_add_heading()

In [14]:
data_cleaned

Unnamed: 0,PROBE_ID,LAT,LONG,HEADING,SPEED,PROBE_DATA_PROVIDER,LOCAL_TIME,coord,section,street,HOUR,HEADING_FIXED
563,18274272,33.606906,-117.919411,281.0,16.00000,FLEET51,2018-07-01 17:00:59-07:00,POINT (-117.9194113 33.60690579999999),rest_of_new_port,w_balboa,17,-79.0
445,18274272,33.607024,-117.920146,0.0,0.00000,FLEET51,2018-07-01 17:02:29-07:00,POINT (-117.9201456 33.607024),rest_of_new_port,w_balboa,17,0.0
504,18274272,33.607087,-117.920380,281.0,18.00000,FLEET51,2018-07-01 17:02:45-07:00,POINT (-117.9203801 33.6070871),rest_of_new_port,w_balboa,17,-79.0
307,3fd3363b7ec448d8aabd3d1f913d43f3,33.608220,-117.928470,216.0,3.00000,CONSUMER21,2018-07-01 17:03:33-07:00,POINT (-117.92847 33.60822),rest_of_new_port,,17,-144.0
273,3fd3363b7ec448d8aabd3d1f913d43f3,33.608150,-117.928400,138.0,4.00000,CONSUMER21,2018-07-01 17:03:38-07:00,POINT (-117.9284 33.60815),rest_of_new_port,,17,138.0
315,326f9d71e0ab4618ba5edb1f42b9237a,33.603940,-117.909680,100.0,4.00000,CONSUMER21,2018-07-01 17:04:43-07:00,POINT (-117.90968 33.60394),rest_of_new_port,,17,100.0
274,18274272,33.607495,-117.923093,279.0,0.00000,FLEET51,2018-07-01 17:05:44-07:00,POINT (-117.9230933 33.6074945),rest_of_new_port,w_balboa,17,-81.0
663,18274272,33.607495,-117.923093,279.0,0.00000,FLEET51,2018-07-01 17:05:45-07:00,POINT (-117.9230933 33.6074945),rest_of_new_port,w_balboa,17,-81.0
280,18274272,33.607769,-117.924928,280.0,16.00000,FLEET51,2018-07-01 17:07:28-07:00,POINT (-117.924928 33.607769),rest_of_new_port,w_balboa,17,-80.0
355,18274272,33.608711,-117.927393,316.0,17.00000,FLEET51,2018-07-01 17:09:32-07:00,POINT (-117.9273931 33.6087106),rest_of_new_port,w_balboa,17,-44.0


### Analysis of data based on road and transportation type

In [15]:
# Balboa
bal_north_limits = [-90,45]
bal_south_limits = [145,-150]
# Newport
newport_north_limits = [-140,0]  
newport_south_limits = [90,-160]

speed_limit = 15

In [16]:
# For Balboa
analysis_balboa = road_specific_analysis(data_cleaned,'balboa')

# Northbound
veh_balboa_N_speed, veh_balboa_N_probe = analysis_balboa.\
                                         get_heading_info(bal_north_limits,\
                                         speed_limit, "Vehicle")


ped_balboa_N_speed, ped_balboa_N_probe = analysis_balboa.\
                                         get_heading_info(bal_north_limits,\
                                         speed_limit, "Pedestrian")


# Southbound

veh_balboa_S_speed, veh_balboa_S_probe = analysis_balboa.\
                                         get_heading_info(bal_south_limits,\
                                         speed_limit, "Vehicle")


ped_balboa_S_speed, ped_balboa_S_probe = analysis_balboa.\
                                         get_heading_info(bal_south_limits,\
                                         speed_limit, "Pedestrian")


In [17]:
# For Newport
analysis_newport = road_specific_analysis(data_cleaned,'newport')

# Northbound
veh_newport_N_speed, veh_newport_N_probe = analysis_newport.\
                                         get_heading_info(newport_north_limits,\
                                         speed_limit, "Vehicle")


ped_newport_N_speed, ped_newport_N_probe = analysis_newport.\
                                         get_heading_info(newport_north_limits,\
                                         speed_limit, "Pedestrian")


# Southbound

veh_newport_S_speed, veh_newport_S_probe = analysis_newport.\
                                         get_heading_info(newport_south_limits,\
                                         speed_limit, "Vehicle")


ped_newport_S_speed, ped_newport_S_probe = analysis_newport.\
                                         get_heading_info(newport_south_limits,\
                                         speed_limit, "Pedestrian")

In [18]:
# Create a dataframe of road information per hour
road_info_df = pd.DataFrame({'Hour':range(0,24),\
                'Newport_North_Speed_Veh':veh_newport_N_speed,\
                'Newport_South_Speed_Veh':veh_newport_S_speed,\
                'Newport_North_Probe_Veh':veh_newport_N_probe,\
                'Newport_South_Probe_Veh':veh_newport_S_probe,\
                'Newport_North_Speed_Ped':ped_newport_N_speed,\
                'Newport_South_Speed_Ped':ped_newport_S_speed,\
                'Newport_North_Probe_Ped':ped_newport_N_probe,\
                'Newport_South_Probe_Ped':ped_newport_S_probe,\
                             
                'Balboa_North_Speed_Veh':veh_balboa_N_speed,\
                'Balboa_South_Speed_Veh':veh_balboa_S_speed,\
                'Balboa_North_Probe_Veh':veh_balboa_N_probe,\
                'Balboa_South_Probe_Veh':veh_balboa_S_probe,\
                'Balboa_North_Speed_Ped':ped_balboa_N_speed,\
                'Balboa_South_Speed_Ped':ped_balboa_S_speed,\
                'Balboa_North_Probe_Ped':ped_balboa_N_probe,\
                'Balboa_South_Probe_Ped':ped_balboa_S_probe})




In [19]:
road_info_df

Unnamed: 0,Hour,Newport_North_Speed_Veh,Newport_South_Speed_Veh,Newport_North_Probe_Veh,Newport_South_Probe_Veh,Newport_North_Speed_Ped,Newport_South_Speed_Ped,Newport_North_Probe_Ped,Newport_South_Probe_Ped,Balboa_North_Speed_Veh,Balboa_South_Speed_Veh,Balboa_North_Probe_Veh,Balboa_South_Probe_Veh,Balboa_North_Speed_Ped,Balboa_South_Speed_Ped,Balboa_North_Probe_Ped,Balboa_South_Probe_Ped
0,0,0.0,0.0,0,0,3.0,0.0,1,0,0.0,0.0,0,0,0.0,0.0,0,0
1,1,59.0,0.0,1,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0
2,2,30.640351,28.653333,3,3,14.0,14.0,1,1,0.0,0.0,0,0,0.0,0.0,0,0
3,3,41.84284,41.84284,1,1,0.0,0.0,0,0,59.54558,59.54558,1,1,0.0,0.0,0,0
4,4,37.37472,49.0,2,1,9.253705,9.65604,1,1,0.0,0.0,0,0,1.60934,0.0,1,0
5,5,39.736717,26.956445,3,2,10.192487,0.0,1,0,0.0,0.0,0,0,6.115492,4.82802,1,1
6,6,38.58617,39.438227,8,7,6.5,1.0,1,1,43.91345,56.3269,2,1,8.0467,8.0467,2,1
7,7,26.367207,28.096394,5,4,0.0,0.0,0,0,20.11675,0.0,1,0,0.0,0.0,0,0
8,8,31.435083,30.4,7,5,8.21868,8.5,4,2,46.92142,0.0,2,0,9.65604,9.65604,1,1
9,9,34.699266,33.48791,8,6,9.52335,10.02335,2,2,25.841963,23.455856,5,3,5.875,1.5,2,1


In [20]:


def find_slowest(df, column,probe):
    
    slow = []
    for i in df[column]:
        if i>0:
            slow.append(i)
    slowest_speed = min(slow)
    slowest_df = df.loc[df[column]==slowest_speed][['Hour', column, probe]]

    return slowest_df



In [21]:
N_N_slow = find_slowest(road_info_df,'Newport_North_Speed_Veh','Newport_North_Probe_Veh')
S_N_slow = find_slowest(road_info_df,'Newport_South_Speed_Veh','Newport_South_Probe_Veh')

N_B_slow = find_slowest(road_info_df,'Balboa_North_Speed_Veh','Balboa_North_Probe_Veh')
S_B_slow = find_slowest(road_info_df,'Balboa_South_Speed_Veh','Balboa_South_Probe_Veh')

In [22]:
N_N_slow

Unnamed: 0,Hour,Newport_North_Speed_Veh,Newport_North_Probe_Veh
19,19,23.953738,3


In [23]:
S_N_slow

Unnamed: 0,Hour,Newport_South_Speed_Veh,Newport_South_Probe_Veh
13,13,25.528478,7


In [24]:
N_B_slow

Unnamed: 0,Hour,Balboa_North_Speed_Veh,Balboa_North_Probe_Veh
17,17,15.631579,1


In [25]:
S_B_slow

Unnamed: 0,Hour,Balboa_South_Speed_Veh,Balboa_South_Probe_Veh
17,17,15.461538,1
