In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.io as pio
pd.options.plotting.backend = "plotly"
pio.templates.default = "plotly_white"
#--#
df = pd.read_csv("https://raw.githubusercontent.com/guebin/DV2023/main/posts/NYCTaxi.csv").assign(
    log_trip_duration = lambda df: np.log(df.trip_duration),
    pickup_datetime = lambda df: df.pickup_datetime.apply(pd.to_datetime),
    dropoff_datetime = lambda df: df.dropoff_datetime.apply(pd.to_datetime),
    dist = lambda df: np.sqrt((df.pickup_latitude-df.dropoff_latitude)**2 + (df.pickup_longitude-df.dropoff_longitude)**2),
    #---#
    vendor_id = lambda df: df.vendor_id.map({1:'A',2:'B'})
).assign(
    speed = lambda df: df.dist / df.trip_duration,
    pickup_hour = lambda df: df.pickup_datetime.dt.hour,
    dropoff_hour = lambda df: df.dropoff_datetime.dt.hour,
    dayofweek = lambda df: df.pickup_datetime.dt.dayofweek
)
df_small = df[::100].reset_index(drop=True)

# 기말고사1-(1),(2)

## Row {Height=100%}


In [None]:
tidydata = df.groupby(['pickup_hour', 'dayofweek']).speed.mean().reset_index()
tidydata['dayofweek'] = tidydata['dayofweek'].map({0:'월', 1:'화', 2:'수', 3:'목', 4:'금', 5:'토', 6:'일'})
#--#
fig = px.density_heatmap(
    data_frame=tidydata,
    x='pickup_hour',
    y='dayofweek',
    z='speed',
    nbinsx=24,
    nbinsy=7,
    height=400
)
fig.update_layout(
    xaxis_title='pickup 시간',
    yaxis_title='요일',
    coloraxis_colorbar_title='평균 속력'
)
fig.show()

## Row {Height=100%}


In [None]:
tidydata_dist = df.groupby(['pickup_hour', 'dayofweek']).dist.mean().reset_index()
tidydata_dist['dayofweek'] = tidydata_dist['dayofweek'].map({0:'월', 1:'화', 2:'수', 3:'목', 4:'금', 5:'토', 6:'일'})
#--#
fig_dist = px.density_heatmap(
    data_frame=tidydata_dist,
    x='pickup_hour',
    y='dayofweek',
    z='dist',
    nbinsx=24,
    nbinsy=7,
    height=350
)
fig_dist.update_layout(
    xaxis_title='시간',
    yaxis_title='요일',
    coloraxis_colorbar_title='평균 이동 거리'
)
fig_dist.show()

# 기말고사1-(3)

## Column {width=100%}


In [None]:
import pandas as pd
import plotly.express as px
pcol = ['pickup_datetime', 'pickup_longitude', 'pickup_latitude', 'pickup_hour']
dcol = ['dropoff_datetime', 'dropoff_longitude', 'dropoff_latitude', 'dropoff_hour']
#--#
def transform(df):
    pickup = df.loc[:, ['id']+pcol].set_axis(['id', 'datetime', 'longitude', 'latitude', 'hour'], axis=1).assign(type='pickup')
    dropoff = df.loc[:, ['id']+dcol].set_axis(['id', 'datetime', 'longitude', 'latitude', 'hour'], axis=1).assign(type='dropoff')
    return pd.concat([pickup, dropoff], axis=0)
df_left = df_small.drop(pcol+dcol, axis=1)
df_right = pd.concat([transform(df) for i, df in df_small.groupby('id')]).reset_index(drop=True)
df_small2 = df_left.merge(df_right)
tidydata = df_small2.assign(speed_cut=pd.qcut(df_small2['speed'], 4, labels=['매우느림','조금느림','조금빠름','매우빠름'])).sort_values('speed_cut')
fig = px.line_mapbox(
    data_frame=tidydata,
    lat='latitude',
    lon='longitude',
    line_group='id',
    color='speed_cut',
    center={'lat': 40.7322, 'lon': -73.9052},
    mapbox_style='carto-positron',
    zoom=10,
    width=750,
    height=600
)
scatter_data = px.scatter_mapbox(
    data_frame=tidydata,
    lat='latitude',
    lon='longitude',
    size='passenger_count',
    size_max=10,
    color='speed_cut',
    mapbox_style='carto-positron',
    zoom=10,
    width=750,
    height=600
).data
for sd in scatter_data:
    fig.add_trace(sd)

fig.update_traces(
    line={'width': 1},
    opacity=0.8
)
fig.show(config={'scrollZoom': False})