In [4]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
from shapely.geometry import Polygon, Point, LineString
from datetime import datetime

## Read shapefiles into variables

In [13]:
# Read shape files into geopandas
right_half_tri_SF = gpd.read_file("shapefiles/right_half_triangle/right_half_triangle.shp")[['Name', 'geometry']]
left_half_tri_SF = gpd.read_file("shapefiles/left_half_triangle/left_half_triangle.shp")[['Name', 'geometry']]
island_SF = gpd.read_file("shapefiles/island/island.shp")[['Name', 'geometry']]
newport_SF = gpd.read_file("shapefiles/inundation_map/inundation_map.shp")[['Name', 'geometry']]
# Read Street Shape Files
balboa_blvd = gpd.read_file("shapefiles/Streets/Balboa/Balboa_Blvd-polygon.shp")[['Name', 'geometry']]
w_balboa_blvd = gpd.read_file("shapefiles/Streets/W_Balboa/W_Balboa-polygon.shp")[['Name', 'geometry']]
newport = gpd.read_file("shapefiles/Streets/Newport/Newport_Blvd-polygon.shp")[['Name', 'geometry']]

In [14]:
section_shapefile_list = [right_half_tri_SF,left_half_tri_SF,island_SF,newport_SF]
street_shapefile_list = [balboa_blvd,w_balboa_blvd,newport]

In [62]:
for i in range(len(section_shapefile_list)):

    print(section_shapefile_list[i]['Name'][0])

right_half_triangle
left_half_triangle
Island
Inundation Map


## Read data for each day

In [2]:
# Assign data for each day
d_1_07012018 = 'Data/Probe_Data/2018_07_01_NewportBeach_basic_probe.csv'
d_2_07022018 = 'Data/Probe_Data/2018_07_02_NewportBeach_basic_probe.csv'
d_3_07032018 = 'Data/Probe_Data/2018_07_03_NewportBeach_basic_probe.csv'
d_4_07042018 = 'Data/Probe_Data/2018_07_04_NewportBeach_basic_probe.csv'
d_5_07052018 = 'Data/Probe_Data/2018_07_05_NewportBeach_basic_probe.csv'
d_6_07062018 = 'Data/Probe_Data/2018_07_06_NewportBeach_basic_probe.csv'
d_7_07072018 = 'Data/Probe_Data/2018_07_07_NewportBeach_basic_probe.csv'

In [77]:
data = pd.read_csv(d_4_07042018,
                   names = ["PROBE_ID","SAMPLE_DATE", "LAT", "LONG", "HEADING",\
                           "SPEED","PROBE_DATA_PROVIDER","X","Y", "LOCAL_TIME"])
# This now sorts in date order
data.sort_values(by='SAMPLE_DATE', inplace=True, ascending=True) 
data.head(3)

Unnamed: 0,PROBE_ID,SAMPLE_DATE,LAT,LONG,HEADING,SPEED,PROBE_DATA_PROVIDER,X,Y,LOCAL_TIME
480,285613c2-f71d-55f6-bb2b-8f16528c0a2c,2018-07-04 00:00:00,33.615262,-117.927329,34.0,14.0,CONSUMER14,413977.562793,3719884.0,2018-07-03 17:00:00 -0700
479,285613c2-f71d-55f6-bb2b-8f16528c0a2c,2018-07-04 00:00:01,33.615269,-117.927325,19.0,12.0,CONSUMER14,413977.940815,3719885.0,2018-07-03 17:00:01 -0700
445,285613c2-f71d-55f6-bb2b-8f16528c0a2c,2018-07-04 00:00:02,33.615287,-117.927314,19.0,17.0,CONSUMER14,413978.979134,3719886.0,2018-07-03 17:00:02 -0700


# Analyze Data

In [116]:
class Analyze_Data:
    
    def __init__(self, data_df,section_shapefile_list,street_shapefile_list):
        # Reading data dataframe into class
        self.data = data_df 
        
        # Read shapefiles into class as lists
        self.section = section_shapefile_list
        self.street = street_shapefile_list
        
    
    def plot(self):
        # Plot Data
        plt.figure(figsize = (12,8))
        plt.scatter(self.data.LONG, self.data.LAT, s = 0.5, c = 'k')
        plt.show()
    
    
    def create_point(self,row):
        # Helper function for turning coordinates into shapely points
        return Point(row['LONG'],row['LAT'])
    
    def append_points(self):

        self.data['coord'] = self.data.apply(self.create_point, axis=1)
        self.data['section'] = 'N/A'
        
    def get_section(self):
        # For separating data into sections based on defined shapefiles
        
        self.append_points()
        
        section_list = []
        
        for i in self.data.index.values:
            if self.section[0]['geometry'][0].contains(self.data['coord'][i]):
                section_list.append('right_half_tri')
            elif self.section[1]['geometry'][0].contains(self.data['coord'][i]):
                section_list.append('left_half_tri')
            elif self.section[2]['geometry'][0].contains(self.data['coord'][i]):
                section_list.append('island')
            elif self.section[3]['geometry'][0].contains(self.data['coord'][i]):
                section_list.append('rest_of_new_port')
            else:
                section_list.append('N/A')
        
        self.data['section'] = section_list
        
        return self.data
    
    def get_street(self):
        # For separating data into streets based on defined shapefiles
        
        self.get_section()
        street_list = []

        for i in self.data.index.values:
            if self.street[0]['geometry'][0].contains(self.data['coord'][i]):
                street_list.append('balboa')
            elif self.street[1]['geometry'][0].contains(self.data['coord'][i]):
                street_list.append('w_balboa')
            elif self.street[2]['geometry'][0].contains(self.data['coord'][i]):
                street_list.append('newport')
            else:
                street_list.append('N/A')

        self.data['street'] = street_list
        
        return self.data
    
    def clean_data(self):
        # Remove useless columns and set timezone
        self.data_cleaned = self.data[self.data.section != 'N/A'][['PROBE_ID','LAT','LONG',\
                                                            'HEADING',\
                                                            'SPEED',\
                                                            'PROBE_DATA_PROVIDER',\
                                                            'LOCAL_TIME',\
                                                            'coord',\
                                                            'section','street']]
        
        self.data_cleaned['LOCAL_TIME'] = pd.to_datetime(self.data_cleaned['LOCAL_TIME'])\
                                        .dt.tz_localize('UTC').dt.tz_convert('America/Los_Angeles')
        # Separate to hours
        self.data_cleaned['HOUR'] = self.data_cleaned['LOCAL_TIME'].dt.hour  
        
        return self.data_cleaned
    
    def clean_data_add_heading(self):
        # Add heading information per probe
        self.clean_data()
        
        heading_fixed = []
        
        for i in self.data_cleaned.index.values:
            if self.data_cleaned['HEADING'][i] >180:
                heading_fixed.append(-(360 - self.data_cleaned['HEADING'][i]))
            else:
                heading_fixed.append(self.data_cleaned['HEADING'][i])
        
        self.data_cleaned['HEADING_FIXED'] = heading_fixed
        
        return self.data_cleaned

    
    def probe_count(self, section):
        # For counting number of uniuqe probes
        self.clean_data_add_heading()
        uniq_probe = self.data_cleaned.loc[self.data_cleaned['section'] == section].PROBE_ID.unique()

        return len(uniq_probe)
    
    

In [186]:
class road_specific_analysis():
    
    def __init__(self, data_cleaned, street_choice):
        self.data = data_cleaned
        self.street = street_choice
    
    def road_df(self):
        # Filter and create dataframe based on road choice
        self.data
        self.road = self.data.loc[self.data['street']== self.street].copy()
        
        return self.road
    
    def get_heading(self, df, i, lower_head, upper_head):
        
        # Helper function for differentiating heading
        head_df = df.loc[(df['HOUR']==i) & \
                         ((df['HEADING_FIXED'] >= lower_head) | \
                         (df['HEADING_FIXED'] <= upper_head))][['PROBE_ID','SPEED']]
        
        # This removes all 0 speed instances
#         head_df = head_df[head_df['SPEED'] != 0]
        unique_probe_list = list(set(head_df['PROBE_ID']))
        
        return head_df, unique_probe_list
    
    def get_heading_info(self, limits, speed_limit, trans_type):
        
        '''
        Inputs:
            df - cleaned up dataframe with hours and streets 
            limits - list of heading limits, [lower, higher]
            trans_type - looking for 'vehicle' or 'pedestrian'
            
        Outputs:
            head_speed - list of 24 north heading speeds averaged over an hour
            head_unique_probe - list of 24 counts of unique probe ids in that hour - vehicles
        '''
        
        self.road_df()
        
        head_speed = []
        head_unique_probe = []
        
        for i in range(24):
        # Analyze for 24 hours
            head_df, unique_probe_list = self.get_heading(self.road, i, limits[0], limits[1])
            
            count = 0
            speed = []
            
            for j in unique_probe_list:

                probe_df = head_df.loc[head_df['PROBE_ID'] == j]
                mean_speed = np.mean(probe_df['SPEED'])

                if trans_type == 'Vehicle':

                    if mean_speed >= speed_limit:
                        count += 1
                        speed.append(mean_speed)

                elif trans_type == 'Pedestrian':

                    if mean_speed <= speed_limit:
                        count += 1
                        speed.append(mean_speed)
                    
        
            head_unique_probe.append(count)

            if not speed:
                head_speed.append([])
            else:
                head_speed.append(np.mean(speed))
            
        return head_speed, head_unique_probe
            

## Notes
### For unique probe count
### Section Options:
- 'right_half_tri'
- 'left_half_tri'
- 'island'
- 'rest_of_new_port'

### Street Options
- 'balboa'
- 'newport'


### Blanket sorting for all data

In [187]:
analysis = Analyze_Data(data, section_shapefile_list, street_shapefile_list)
data_cleaned = analysis.clean_data_add_heading()

### Analysis of data based on road and transportation type

In [188]:
# Balboa
bal_north_limits = [-90,45]
bal_south_limits = [145,-150]
# Newport
newport_north_limits = [-140,0]  
newport_south_limits = [90,-160]

speed_limit = 15

In [194]:
# For Balboa
analysis_balboa = road_specific_analysis(data_cleaned,'balboa')

# Northbound
veh_balboa_N_speed, veh_balboa_N_probe = analysis_balboa.\
                                         get_heading_info(bal_north_limits,\
                                         speed_limit, "Vehicle")


ped_balboa_N_speed, ped_balboa_N_probe = analysis_balboa.\
                                         get_heading_info(bal_north_limits,\
                                         speed_limit, "Pedestrian")


# Southbound

veh_balboa_S_speed, veh_balboa_S_probe = analysis_balboa.\
                                         get_heading_info(bal_south_limits,\
                                         speed_limit, "Vehicle")


ped_balboa_S_speed, ped_balboa_S_probe = analysis_balboa.\
                                         get_heading_info(bal_south_limits,\
                                         speed_limit, "Pedestrian")


In [195]:
# For Newport
analysis_newport = road_specific_analysis(data_cleaned,'newport')

# Northbound
veh_newport_N_speed, veh_newport_N_probe = analysis_newport.\
                                         get_heading_info(newport_north_limits,\
                                         speed_limit, "Vehicle")


ped_newport_N_speed, ped_newport_N_probe = analysis_newport.\
                                         get_heading_info(newport_north_limits,\
                                         speed_limit, "Pedestrian")


# Southbound

veh_newport_S_speed, veh_newport_S_probe = analysis_newport.\
                                         get_heading_info(newport_south_limits,\
                                         speed_limit, "Vehicle")


ped_newport_S_speed, ped_newport_S_probe = analysis_newport.\
                                         get_heading_info(newport_south_limits,\
                                         speed_limit, "Pedestrian")

In [196]:
# Create a dataframe of road information per hour
road_info_df = pd.DataFrame({'Hour':range(0,24),\
                'Newport_North_Speed_Veh':veh_newport_N_speed,\
                'Newport_South_Speed_Veh':veh_newport_S_speed,\
                'Newport_North_Probe_Veh':veh_newport_N_probe,\
                'Newport_South_Probe_Veh':veh_newport_S_probe,\
                'Newport_North_Speed_Ped':ped_newport_N_speed,\
                'Newport_South_Speed_Ped':ped_newport_S_speed,\
                'Newport_North_Probe_Ped':ped_newport_N_probe,\
                'Newport_South_Probe_Ped':ped_newport_S_probe,\
                             
                'Balboa_North_Speed_Veh':veh_balboa_N_speed,\
                'Balboa_South_Speed_Veh':veh_balboa_S_speed,\
                'Balboa_North_Probe_Veh':veh_balboa_N_probe,\
                'Balboa_South_Probe_Veh':veh_balboa_S_probe,\
                'Balboa_North_Speed_Ped':ped_balboa_N_speed,\
                'Balboa_South_Speed_Ped':ped_balboa_S_speed,\
                'Balboa_North_Probe_Ped':ped_balboa_N_probe,\
                'Balboa_South_Probe_Ped':ped_balboa_S_probe})




In [197]:
road_info_df

Unnamed: 0,Hour,Newport_North_Speed_Veh,Newport_South_Speed_Veh,Newport_North_Probe_Veh,Newport_South_Probe_Veh,Newport_North_Speed_Ped,Newport_South_Speed_Ped,Newport_North_Probe_Ped,Newport_South_Probe_Ped,Balboa_North_Speed_Veh,Balboa_South_Speed_Veh,Balboa_North_Probe_Veh,Balboa_South_Probe_Veh,Balboa_North_Speed_Ped,Balboa_South_Speed_Ped,Balboa_North_Probe_Ped,Balboa_South_Probe_Ped
0,0,41.329114,45.3291,2,2,[],[],0,0,32.8125,25.7778,1,1,[],[],0,0
1,1,27.35878,27.3588,1,1,[],[],0,0,[],[],0,0,[],[],0,0
2,2,27.285714,15.5714,2,1,[],[],0,0,[],[],0,0,5.73434,7.5,2,1
3,3,30.57746,30.5775,1,1,10.7204,8.96632,2,1,[],[],0,0,10.8567,[],2,0
4,4,41.03817,15.2887,1,1,11.9057,[],2,0,[],[],0,0,[],[],0,0
5,5,29.504567,39.4288,1,1,[],[],0,0,[],[],0,0,6.20745,[],1,0
6,6,47.05411,41,2,1,[],[],0,0,31.5,[],1,0,7.24203,9.65604,2,1
7,7,37.823529,37.8235,1,1,10.3043,10.3043,1,1,17.0833,18,2,2,12.5,12,1,1
8,8,30.936156,26.936,4,3,0,0,1,1,19.1923,[],1,0,5,8.13333,1,2
9,9,23.284712,30.6541,3,1,6.54286,10.1048,1,2,22.5308,[],1,0,5.52619,7.30435,2,2
