In [None]:
import sys
# add parent directory and its parent to sys.path so that python finds the modules
sys.path.append('..')
sys.path.append('../..')

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd

from datetime import datetime
from cluster_utils import return_cluster_results_and_plot_path
from sklearn.linear_model import LogisticRegression

In [None]:
# use data until ..
end_date_str = '2023-01-01 00:00:00'
end_date = datetime.strptime(end_date_str, '%Y-%m-%d %H:%M:%S')

# files you want to exclude, e.g. because they use the pedestrian way
files_to_exclude = ['VM2_-2112701535', 'VM2_-217686115','VM2_-1247665811','VM2_-104300786', 'VM2_330973206', 'VM2_1100569031', 'VM2_421371629', 'VM2_421371629', 'VM2_1476499235', 'VM2_-1451152685', 'VM2_-1523872256']

In [None]:
intersections = [
    # Alexanderstr./Karl-Marx-Allee/Ottobraun-Str.
    {
        "intersection_name": "Alexanderstr./Karl-Marx-Allee/Ottobraun-Str.",
        "direction": "north to east",
        "start_rect_coords": (13.416448,52.522311,13.416889,52.522671),
        "end_rect_coords":   (13.416743,52.521404,13.417205,52.521852),
        "n_input_lanes": 3,
        "n_output_lanes": 2
    },
    {
        "intersection_name": "Alexanderstr./Karl-Marx-Allee/Ottobraun-Str.",
        "direction": 'east to south',
        "start_rect_coords": (13.41673,52.5219,13.417469,52.522088),
        "end_rect_coords": (13.416021,52.521813,13.416321,52.52203),
        "exclude_coords": (13.416364,52.521767,13.416761,52.521984),
        "n_input_lanes": 5,
        "n_output_lanes": 3 
    },
    {
        "intersection_name": "Alexanderstr./Karl-Marx-Allee/Ottobraun-Str.",
        "direction": 'south to west',
        "start_rect_coords": (13.416169,52.521508,13.416513,52.521832),
        "end_rect_coords": (13.41591,52.522275,13.416421,52.522569),
        "exclude_coords": (13.415467,52.521919,13.416219,52.522107),
        "n_input_lanes": 5,
        "n_output_lanes": 3 
    },
    {
        "intersection_name": "Alexanderstr./Karl-Marx-Allee/Ottobraun-Str.",
        "direction": 'west to north',
        "start_rect_coords": (13.415638,52.522086,13.415982,52.522299),
        "end_rect_coords": (13.41685,52.52219,13.417194,52.522403),
        "exclude_coords": (13.416228,52.522279,13.416572,52.522492),
        "n_input_lanes": 4,
        "n_output_lanes": 2
    },
    # 'Leibnizstr./Bismarckstr.'
    {
        "intersection_name": "Leibnizstr./Bismarckstr.",
        "direction": "north to east",
        "start_rect_coords": (13.314892,52.511974,13.315527,52.512162),
        "end_rect_coords": (13.31457,52.512248,13.314861,52.512583),
        "n_input_lanes": 3,
        "n_output_lanes": 5
    },
    {
        "intersection_name": "Leibnizstr./Bismarckstr.",
        "direction": "east to south",
        "start_rect_coords": (13.314913,52.512157,13.315644,52.512375),
        "end_rect_coords": (13.314462,52.511618,13.314807,52.512022),
        "n_input_lanes": 5,
        "n_output_lanes": 2
    },
    {
        "intersection_name": "Leibnizstr./Bismarckstr.",
        "direction": "south to west",
        "start_rect_coords": (13.314699,52.51167,13.31498,52.512028),
        "end_rect_coords": (13.314082,52.512114,13.314669,52.512289),
        "n_input_lanes": 3,
        "n_output_lanes": 4
    },
    {
        "intersection_name": "Leibnizstr./Bismarckstr.",
        "direction": "west to north",
        "start_rect_coords": (13.314023,52.511924,13.314636,52.512129),
        "end_rect_coords": (13.314715,52.512248,13.315043,52.512619),
        "n_input_lanes": 5,
        "n_output_lanes": 2
    },
    # 'Petersburger Str./Frankfurter Allee/Warschauer Str./Karl-Marx-Allee'
    {
        "intersection_name": "Petersburger Str./Frankfurter Allee/Warschauer Str./Karl-Marx-Allee",
        "direction": "north to east",
        "start_rect_coords": (13.453274,52.516103,13.45385,52.516485),
        "end_rect_coords": (13.454497,52.515534,13.455293,52.515691),
        "exclude_coords": (13.454095,52.515812,13.454569,52.516028),
        "n_input_lanes": 4,
        "n_output_lanes": 3
    },
    {
        "intersection_name": "Petersburger Str./Frankfurter Allee/Warschauer Str./Karl-Marx-Allee",
        "direction": "east to south",
        "start_rect_coords": (13.454486,52.515753,13.455282,52.51591),
        "end_rect_coords": (13.453429,52.515179,13.453849,52.515643),
        "exclude_coords": (13.454154,52.515476,13.454628,52.515692),
        "n_input_lanes": 5,
        "n_output_lanes": 2
    },
    {
        "intersection_name": "Petersburger Str./Frankfurter Allee/Warschauer Str./Karl-Marx-Allee",
        "direction": "south to west",
        "start_rect_coords": (13.454073,52.515495,13.454289,52.51564),
        "end_rect_coords": (13.453616,52.515832,13.453918,52.515993),
        "n_input_lanes": 3,
        "n_output_lanes": 3
    },
    {
        "intersection_name": "Petersburger Str./Frankfurter Allee/Warschauer Str./Karl-Marx-Allee",
        "direction": "west to north",
        "start_rect_coords": (13.453209,52.515697,13.453629,52.515865),
        "end_rect_coords": (13.453981,52.515955,13.454401,52.516123),
        "n_input_lanes": 4,
        "n_output_lanes": 2
    },
    # 'Mehringdamm/Gneisenaustr./Yorckstr.'
    {
        "intersection_name": "Mehringdamm/Gneisenaustr./Yorckstr.",
        "direction": "north to east",
        "start_rect_coords": (13.387586,52.493157,13.387898,52.493592),
        "end_rect_coords": (13.38801,52.492482,13.388858,52.492737),
        "exclude_coords": (13.387999,52.492964,13.388343,52.493187),
        "n_input_lanes": 4,
        "n_output_lanes": 3
    },
    {
        "intersection_name": "Mehringdamm/Gneisenaustr./Yorckstr.",
        "direction": "east to south",
        "start_rect_coords": (13.388074,52.492817,13.388525,52.49302),
        "end_rect_coords": (13.387153,52.492475,13.387508,52.492772),
        "exclude_coords": (13.387752,52.492474,13.388096,52.492697),
        "n_input_lanes": 4,
        "n_output_lanes": 3
    },
    {
        "intersection_name": "Mehringdamm/Gneisenaustr./Yorckstr.",
        "direction": "south to west",
        "start_rect_coords": (13.387469,52.492345,13.387899,52.492652),
        "end_rect_coords": (13.387001,52.493046,13.387592,52.4933),
        "exclude_coords": (13.387103,52.492634,13.387447,52.492857),
        "n_input_lanes": 4,
        "n_output_lanes": 3
    },
    {
        "intersection_name": "Mehringdamm/Gneisenaustr./Yorckstr.",
        "direction": "west to north",
        "start_rect_coords": (13.38668,52.492775,13.387303,52.493003),
        "end_rect_coords": (13.387999,52.493079,13.388305,52.493431),
        "exclude_coords": (13.387366,52.493039,13.387785,52.493347),
        "n_input_lanes": 4,
        "n_output_lanes": 3
    },
    # 'Potsdamer Str./Goebenstr./Pallasstr.'
    {
        "intersection_name": "Potsdamer Str./Goebenstr./Pallasstr.",
        "direction": "north to east",
        "start_rect_coords": (13.360731,52.494571,13.361093,52.494912),
        "end_rect_coords": (13.361284,52.494088,13.361861,52.494275),
        "n_input_lanes": 3,
        "n_output_lanes": 3
    },
    {
        "intersection_name": "Potsdamer Str./Goebenstr./Pallasstr.",
        "direction": "east to south",
        "start_rect_coords": (13.361359,52.494297,13.361936,52.494484),
        "end_rect_coords": (13.360619,52.49371,13.36104,52.494106),
        "n_input_lanes": 3,
        "n_output_lanes": 3
    },
    {
        "intersection_name": "Potsdamer Str./Goebenstr./Pallasstr.",
        "direction": "south to west",
        "start_rect_coords": (13.360984,52.4937,13.361405,52.494096),
        "end_rect_coords": (13.360276,52.494376,13.360906,52.494609),
        "n_input_lanes": 3,
        "n_output_lanes": 2
    },
    {
        "intersection_name": "Potsdamer Str./Goebenstr./Pallasstr.",
        "direction": "west to north",
        "start_rect_coords": (13.360228,52.494141,13.360858,52.494374),
        "end_rect_coords": (13.361081,52.49453,13.361529,52.494949),
        "n_input_lanes": 2,
        "n_output_lanes": 2
    },
    # 'Warschauer Str./Stralauer Allee/Oberbaumbruecke/Muehlenstr.'
    {
        "intersection_name": "Warschauer Str./Stralauer Allee/Oberbaumbruecke/Muehlenstr.",
        "direction": "north to east",
        "start_rect_coords": (13.446738,52.502935,13.447095,52.503201),
        "end_rect_coords": (13.44714,52.502334,13.447626,52.502544),
        "n_input_lanes": 4,
        "n_output_lanes": 2
    },
    {
        "intersection_name": "Warschauer Str./Stralauer Allee/Oberbaumbruecke/Muehlenstr.",
        "direction": "east to south",
        "start_rect_coords": (13.447414,52.502533,13.4479,52.502743), 
        "end_rect_coords": (13.445976,52.502347,13.446457,52.502619),
        "exclude_coords": (13.446695,52.502422,13.446966,52.502655),
        "n_input_lanes": 4,
        "n_output_lanes": 2
    },
    {
        "intersection_name": "Warschauer Str./Stralauer Allee/Oberbaumbruecke/Muehlenstr.",
        "direction": "south to west",
        "start_rect_coords": (13.446502,52.502252,13.446983,52.502524),
        "end_rect_coords": (13.446282,52.50285,13.446704,52.50307),
        "n_input_lanes": 3,
        "n_output_lanes": 2
    },
    {
        "intersection_name": "Warschauer Str./Stralauer Allee/Oberbaumbruecke/Muehlenstr.",
        "direction": "west to north",
        "start_rect_coords": (13.445933,52.502634,13.446355,52.502854),
        "end_rect_coords": (13.447006,52.502761,13.447262,52.502936),
        "n_input_lanes": 4,
        "n_output_lanes": 2
    }

]

In [None]:
df = pd.DataFrame(intersections)
df.reindex(columns = df.columns.tolist() + ['exlude_coords','n_rides','share_orange_cluster','valid_results','share_direct_turn']);


In [None]:
# df

In [None]:
for idx, row in df.iterrows():
    print(f'intersection:         {row.intersection_name}')
    print(row.direction)
    print(f'start coords: {row.start_rect_coords} \n end coords: {row.end_rect_coords}')
    df.loc[idx,'share_orange_cluster'], df.loc[idx,'n_rides'] = return_cluster_results_and_plot_path(row)
    print('\n')


In [None]:
df

In [None]:
# check from plots if data is usable + explicitly set "share direct turn" depending on which cluster is direct turn


intersection_name = "Alexanderstr./Karl-Marx-Allee/Ottobraun-Str."

df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'north to east'), 'valid_results'] = False #n rides too low
df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'north to east'), 'share_direct_turn'] = np.nan

df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'east to south'), 'valid_results'] = True
df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'east to south'), 'share_direct_turn'] = 0.55

df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'south to west'), 'valid_results'] = True
df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'south to west'), 'share_direct_turn'] = 0.42

df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'west to north'), 'valid_results'] = True
df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'west to north'), 'share_direct_turn'] = 0


intersection_name = "Leibnizstr./Bismarckstr."

df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'north to east'), 'valid_results'] = False
df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'north to east'), 'share_direct_turn'] = np.nan

df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'east to south'), 'valid_results'] = True
df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'east to south'), 'share_direct_turn'] = 0

df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'south to west'), 'valid_results'] = False
df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'south to west'), 'share_direct_turn'] = np.nan

df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'west to north'), 'valid_results'] = False
df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'west to north'), 'share_direct_turn'] = np.nan


intersection_name = "Petersburger Str./Frankfurter Allee/Warschauer Str./Karl-Marx-Allee"

df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'north to east'), 'valid_results'] = True
df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'north to east'), 'share_direct_turn'] = 0

df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'east to south'), 'valid_results'] = True
df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'east to south'), 'share_direct_turn'] = 0

df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'south to west'), 'valid_results'] = True
df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'south to west'), 'share_direct_turn'] = 0.29

df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'west to north'), 'valid_results'] = False #n rides too low
df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'west to north'), 'share_direct_turn'] = np.nan


intersection_name = "Mehringdamm/Gneisenaustr./Yorckstr."

df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'north to east'), 'valid_results'] = True
df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'north to east'), 'share_direct_turn'] = 0

df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'east to south'), 'valid_results'] = True
df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'east to south'), 'share_direct_turn'] = 2/13 #manually count

df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'south to west'), 'valid_results'] = True
df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'south to west'), 'share_direct_turn'] = 0

df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'west to north'), 'valid_results'] = False #unclear if direct or not
df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'west to north'), 'share_direct_turn'] = np.nan


intersection_name = "Potsdamer Str./Goebenstr./Pallasstr."

df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'north to east'), 'valid_results'] = True
df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'north to east'), 'share_direct_turn'] = 0

df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'east to south'), 'valid_results'] = False #unclear if direct or not
df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'east to south'), 'share_direct_turn'] = np.nan

df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'south to west'), 'valid_results'] = False
df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'south to west'), 'share_direct_turn'] = np.nan

df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'west to north'), 'valid_results'] = True
df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'west to north'), 'share_direct_turn'] = 0.33


intersection_name = "Warschauer Str./Stralauer Allee/Oberbaumbruecke/Muehlenstr."

df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'north to east'), 'valid_results'] = True
df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'north to east'), 'share_direct_turn'] = 0.33

df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'east to south'), 'valid_results'] = True
df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'east to south'), 'share_direct_turn'] = 0.42

df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'south to west'), 'valid_results'] = True
df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'south to west'), 'share_direct_turn'] = 0

df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'west to north'), 'valid_results'] = True
df.loc[(df['intersection_name'] == intersection_name) & (df['direction'] == 'west to north'), 'share_direct_turn'] = 0


In [None]:
df[['intersection_name', 'direction', 'n_input_lanes', 'n_output_lanes', 'n_rides', 'share_direct_turn']]

#### Logistic regression

In [None]:
x1 = df['n_input_lanes'].to_numpy(dtype='float')
x2 = df['n_output_lanes'].to_numpy(dtype='float')
X_0 = np.vstack((x1,x2)).T
Y_0 = df['share_direct_turn'].to_numpy()
weights = df['n_rides'].to_numpy(dtype='int')

In [None]:
# remove nans
mask = df['share_direct_turn'].isna()
X_0 = X_0[~mask,:]
Y_0 = Y_0[~mask]
weights = weights[~mask]

Create n=100 samples for each intersection according to direct turn ratio.

Example for one intersection:
* n_input_lanes = 2
* n_output_lanes = 3
* share_direct_turn = 0.2

20 * [2,3] [1]

80 * [2,3] [0]

In [None]:

# add noise for plot
X_noise = np.random.normal(0,0.015,X_0.shape)
Y_noise = np.random.normal(0,0.015,Y_0.shape)
X_0 += X_noise
Y_0 += Y_noise

#remove negative vales due to noise
Y_0 = np.where(Y_0<0,0,Y_0)


In [None]:
plt.scatter(X_0[:,0],Y_0,s = 3)
plt.xlabel('n_input_lanes')
plt.ylabel('share_direkt_turn')

In [None]:
plt.scatter(X_0[:,1], Y_0,s = 5)
plt.xlabel('n_output_lanes')
plt.ylabel('share_direkt_turn')

In [None]:
n=100
Y_0_scaled = (Y_0*n).astype(int)

In [None]:
Y_0_scaled

In [None]:
#weighted equally per intersection/turn
X1 = np.repeat(X_0,Y_0_scaled, axis = 0)
X2 = np.repeat(X_0,(n-Y_0_scaled), axis = 0)


In [None]:
Y1 = np.ones((X1.shape[0],1),dtype='int')
Y2 = np.zeros((X2.shape[0],1),dtype='int')

In [None]:
X = np.vstack((X1,X2))
Y = np.ravel(np.vstack((Y1,Y2)))

In [None]:
x = X[:, 0]
y = X[:, 1]
z = Y

x_pred = np.linspace(1, 5, 25)  
y_pred = np.linspace(1, 5, 25) 
xx_pred, yy_pred = np.meshgrid(x_pred, y_pred)
model_viz = np.array([xx_pred.flatten(), yy_pred.flatten()]).T

In [None]:
logreg = LogisticRegression()
model = logreg.fit(X, Y)

predicted = np.ravel(model.predict_proba(model_viz)[:,1])


predict_prob_direct = model.predict_proba(X_0)[:,1]
predict_prob_direct

In [None]:
r2 = model.score(X, Y)

In [None]:
fig = plt.figure(figsize=(12, 4))

ax1 = fig.add_subplot(131, projection='3d')
ax2 = fig.add_subplot(132, projection='3d')
ax3 = fig.add_subplot(133, projection='3d')

axes = [ax1, ax2, ax3]

for ax in axes:
    ax.plot(X_0[:,0], X_0[:,1], Y_0, color='k',linestyle='none', marker='x', alpha=0.8, label = 'visualization samples')
    ax.scatter(xx_pred.flatten(), yy_pred.flatten(), predicted, facecolor=(0,0,0,0), s=2, edgecolor='#70b3f0', label = 'predicted probabilites')
    ax.set_xlabel('n input lanes', fontsize=12)
    ax.set_ylabel('n output lanes', fontsize=12)
    ax.locator_params(nbins=4, axis='x')
    ax.locator_params(nbins=5, axis='x')

ax1.view_init(elev=5, azim=270)
ax2.view_init(elev=5, azim=0)
ax3.view_init(elev=30, azim=130)

plt.legend()
fig.tight_layout()

In [None]:
# for i,j,k in zip(X_0[:,0],X_0[:,1],predict_prob_direct):
#     print(i,j,round(k,2))



In [None]:
plt.scatter(X_0[:,0],predict_prob_direct,s=7)
plt.xlabel('n_input_lanes')
plt.ylabel('predicted probability')

In [None]:
plt.scatter(X_0[:,1],predict_prob_direct,s=7)
plt.xlabel('n_output_lanes')
plt.ylabel('predicted probability')

In [None]:
display(model.intercept_)
display(model.coef_)