In [1]:
import geopandas as gpd

import calendar
from pandas import DataFrame
import glob

import warnings
warnings.filterwarnings(action='ignore')

# fields names
ped_level= 'ped_level'
import os
general_path = os.path.dirname(os.path.abspath(os.getcwd()))

### get time features and ped level from moovit data

In [2]:
cols_to_use =['Period','W', 'ID', 'morning', 'noon', 'afternoon','night']
moovit_data = [gpd.read_file(x)[cols_to_use] for x in glob.glob(f'{general_path}/output/moovit/*.shp') ]
moovit_data

[     Period        W    ID  morning  noon  afternoon  night
 0     APR21  WeekDay     0        1     0          2      0
 1     APR21  WeekDay     1        4     4          4      3
 2     APR21  WeekDay     2        4     4          4      3
 3     APR21  WeekDay     3        1     9          0      2
 4     APR21  WeekDay     4        0     0          9      1
 ...     ...      ...   ...      ...   ...        ...    ...
 8536  APR21  WeekDay  8760        3     2          1      0
 8537  APR21  WeekDay  8761        0     0          0      0
 8538  APR21  WeekDay  8762        4     4          4      3
 8539  APR21  WeekDay  8763        2     3          2      3
 8540  APR21  WeekDay  8764        3     4          2      3
 
 [8541 rows x 7 columns],
      Period        W    ID  morning  noon  afternoon  night
 0     APR21  WeekEnd     0        0     9          9      9
 1     APR21  WeekEnd     1        3     2          3      3
 2     APR21  WeekEnd     2        2     1          1    

In [3]:
# Calculate season predictor
month_name = [x for x in calendar.month_abbr]
for moovit_file in moovit_data:
    month = month_name.index(moovit_file['Period'][0][:3].lower().capitalize())
    if month in [12, 1, 2]:
        season = 1
    elif month in [3, 4, 5]:
        season = 2
    elif month in [6, 7, 8]:
        season = 3
    else:
        season = 0
    moovit_file['season'] =  season
    # Encode W columns
    moovit_file['day'] = 0 if moovit_file['W'][0]=='WeekDay'else 1


In [4]:
# flat the data so each ped level would be in a new row
def rearrange_data(row):
    r"""
    :param row:
    :return:
    """
    id= row['ID']
    def add_row(day_part,ped_level_val):
        r"""
        if the ped level is not 9 add new line to the list with ID, day part and ped level
        :param ped_level_val:
        :param day_part:
        :param ped_level:
        :return:
        """
        data_for_ml.append([id,day_part,row['season'],row['day'],ped_level_val])
    [add_row(x,row[x+3]) for x in range(4)]

data_for_ml = []
_ = [moovit_file.apply(rearrange_data,axis=1) for moovit_file in moovit_data]
ml_data = DataFrame(data_for_ml,columns=['ID','day part','season','day', ped_level])
ml_data = ml_data[ml_data['ped_level']<9]
ml_data

Unnamed: 0,ID,day part,season,day,ped_level
0,0,0,2,0,1
1,0,1,2,0,0
2,0,2,2,0,2
3,0,3,2,0,0
4,1,0,2,0,4
...,...,...,...,...,...
532538,8763,2,0,1,0
532539,8763,3,0,1,1
532540,8764,0,0,1,0
532541,8764,1,0,1,2


In [14]:
# mean and std
for col in ['day part','season','day']:
    calc_col = "ped_level"
    print(f'mean:{ml_data.groupby(col)[calc_col].mean().round(2)}')
    print(f'std:{ml_data.groupby(col)[calc_col].std().round(2)}')


mean:day part
0    1.78
1    1.99
2    1.98
3    1.88
Name: ped_level, dtype: float64
std:day part
0    1.48
1    1.41
2    1.43
3    1.42
Name: ped_level, dtype: float64
mean:season
0    1.96
1    1.88
2    1.86
3    1.90
Name: ped_level, dtype: float64
std:season
0    1.44
1    1.43
2    1.44
3    1.44
Name: ped_level, dtype: float64
mean:day
0    2.32
1    1.41
Name: ped_level, dtype: float64
std:day
0    1.42
1    1.30
Name: ped_level, dtype: float64


In [15]:
# join street attributes to the rest of the data (lable and time)
network_data = gpd.read_file(f'{general_path}/output/streets_elements/streets_elements_2.shp')

result = ml_data.set_index('ID').join(network_data, how='inner')
ml_result = result.drop(columns=['oidrechov','length'])
# move column 'B' to the end of the dataframe
col_to_move = ml_result.pop(ped_level)
ml_df = ml_result.assign(ped_level=col_to_move)
ml_df.to_csv('output/model_data_final.csv')
ml_df

Unnamed: 0,day part,season,day,buildings,businesses,educationa,Health_ser,Leisure_am,Playground,Sport_faci,...,bike_trail,parks,density,SEleve1_10,closeness,betweennes,node_start,node_end,geometry,ped_level
0,0,2,0,16.036667,0.0,8,1,0,7,3,...,0,12,3.3,7,0.014734,0.004886,0,1,"LINESTRING (184322.705 668574.483, 184351.736 ...",1
0,1,2,0,16.036667,0.0,8,1,0,7,3,...,0,12,3.3,7,0.014734,0.004886,0,1,"LINESTRING (184322.705 668574.483, 184351.736 ...",0
0,2,2,0,16.036667,0.0,8,1,0,7,3,...,0,12,3.3,7,0.014734,0.004886,0,1,"LINESTRING (184322.705 668574.483, 184351.736 ...",2
0,3,2,0,16.036667,0.0,8,1,0,7,3,...,0,12,3.3,7,0.014734,0.004886,0,1,"LINESTRING (184322.705 668574.483, 184351.736 ...",0
0,0,2,1,16.036667,0.0,8,1,0,7,3,...,0,12,3.3,7,0.014734,0.004886,0,1,"LINESTRING (184322.705 668574.483, 184351.736 ...",0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8750,3,0,0,19.660000,0.0,9,3,4,2,2,...,0,6,22.9,6,0.024888,0.001354,6432,6433,"LINESTRING (178804.172 665057.617, 178807.785 ...",4
8750,0,0,1,19.660000,0.0,9,3,4,2,2,...,0,6,22.9,6,0.024888,0.001354,6432,6433,"LINESTRING (178804.172 665057.617, 178807.785 ...",4
8750,1,0,1,19.660000,0.0,9,3,4,2,2,...,0,6,22.9,6,0.024888,0.001354,6432,6433,"LINESTRING (178804.172 665057.617, 178807.785 ...",4
8750,2,0,1,19.660000,0.0,9,3,4,2,2,...,0,6,22.9,6,0.024888,0.001354,6432,6433,"LINESTRING (178804.172 665057.617, 178807.785 ...",4
