# ApartmentSearch: Analysis

In [2]:
import pandas as pd
import re
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import datetime  
from datetime import date 
import calendar

## Data Ingestion & Stats

In [5]:
df = pd.read_csv("travel_data.txt", names=['Date', 'Time', 'Location', 'Destination', 'Distance (mi)', 'Duration (min)', 
                                           'Duration in Traffic (min)', 'Summary'])
df.head()

Unnamed: 0,Date,Time,Location,Destination,Distance (mi),Duration (min),Duration in Traffic (min),Summary
0,2020-2-5,7:30,Norwalk,KIMLEY-HORN,5.1 mi,8 mins,8 mins,TX-1 Loop S
1,2020-2-5,7:30,The Richardson,KIMLEY-HORN,6.5 mi,11 mins,11 mins,TX-1 Loop S
2,2020-2-5,7:30,Westerly 360,KIMLEY-HORN,1.9 mi,5 mins,6 mins,TX-360 Loop S and Frontage Rd
3,2020-2-5,7:30,Woodlawn,KIMLEY-HORN,5.0 mi,9 mins,9 mins,TX-1 Loop S
4,2020-2-5,7:30,Gables Central Park,KIMLEY-HORN,7.6 mi,13 mins,13 mins,TX-1 Loop S


In [22]:
# Cleanse data
data = []
with open('travel_data.txt', 'r') as f:
    raw_data = f.readlines()

for line in raw_data:
    # Strip off min from distance/duration/traffic
    row = line.strip().split(',')
    m = re.match(r"[0-9]+", row[4])
    n = re.match(r"[0-9]+", row[5])
    p = re.match(r"[0-9]+", row[6])
    row[4] = int(m.group(0))
    row[5] = int(n.group(0))
    row[6] = int(p.group(0))
    
    # Make a new column for day of the week (Weekday)
    date = row[0]
    year, month, day = (int(i) for i in date.split('-'))     
    number_day = datetime.date(year, month, day) 
    row.append(number_day.strftime("%A"))
    
    # Take out weekend data because not part of weekly commute
    if ((row[8] == "Saturday") or (row[8] == "Sunday")):
        continue
    else:
        data.append(row)

df = pd.DataFrame(data, columns = ['Date', 'Time', 'Location', 'Destination', 'Distance (mi)', 'Duration (min)', 
                                           'Duration in Traffic (min)', 'Summary', 'Weekday']) 
df.head()

Unnamed: 0,Date,Time,Location,Destination,Distance (mi),Duration (min),Duration in Traffic (min),Summary,Weekday
0,2020-2-5,7:30,Norwalk,KIMLEY-HORN,5,8,8,TX-1 Loop S,Wednesday
1,2020-2-5,7:30,The Richardson,KIMLEY-HORN,6,11,11,TX-1 Loop S,Wednesday
2,2020-2-5,7:30,Westerly 360,KIMLEY-HORN,1,5,6,TX-360 Loop S and Frontage Rd,Wednesday
3,2020-2-5,7:30,Woodlawn,KIMLEY-HORN,5,9,9,TX-1 Loop S,Wednesday
4,2020-2-5,7:30,Gables Central Park,KIMLEY-HORN,7,13,13,TX-1 Loop S,Wednesday


In [23]:
# Quick stats
df.describe()

Unnamed: 0,Distance (mi),Duration (min),Duration in Traffic (min)
count,1224.0,1224.0,1224.0
mean,5.237745,12.459967,15.627451
std,2.517935,5.997549,8.975852
min,0.0,4.0,4.0
25%,3.0,8.0,8.0
50%,5.0,9.0,13.0
75%,7.0,17.0,22.0
max,12.0,32.0,46.0


In [24]:
# Average distances and durations for weekdays by location
# NOTE: Kimley-Horn is skewed because when it is LOCATION (not DESTINATION) it goes to each apartment complex
group_by_Location_morning = df.loc[df['Destination'] == 'KIMLEY-HORN', ['Location', 'Distance (mi)', 'Duration in Traffic (min)']].groupby('Location')
group_by_Location_evening = df.loc[df['Destination'] != 'KIMLEY-HORN', ['Destination', 'Distance (mi)', 'Duration in Traffic (min)']].groupby('Destination')
avgs_morning = group_by_Location_morning.mean()
avgs_evening = group_by_Location_evening.mean()
# Average Distance differs morning/evening because of the location of the office on the Mopac Frontage Road (adds ~2.5 mi)
print("MORNING COMMUTE AVERAGES:")
print(avgs_morning)
print()
print("EVENING COMMUTE AVERAGES:")
print(avgs_evening)

MORNING COMMUTE AVERAGES:
                            Distance (mi)  Duration in Traffic (min)
Location                                                            
AMLI                            10.250000                  24.211538
Barton Creek Landing             3.000000                   7.057692
Burnet                           7.000000                  13.826923
Echo Apartments                  8.269231                  15.884615
Gables Central Park              7.000000                  13.673077
Koenig Flats                     9.288462                  19.653846
Norwalk                          5.000000                   8.557692
Pressler                         4.000000                   7.961538
The Boulevard at Town Lake       4.000000                   8.000000
The Richardson                   6.000000                  10.057692
Westerly 360                     1.134615                   6.000000
Woodlawn                         5.000000                   9.096154

EVENING

In [29]:
# Do the durations in traffic really change much Monday - Friday? Look at std. deviation
stds_morning = group_by_Location_morning.std()
stds_evening = group_by_Location_evening.std()
# Duration in evening for BCL never changes because it is so close to office location
# The farther an apartment complex is from the office, the larger the stddev in duration because different routes some days
print("MORNING COMMUTE STANDARD DEV:")
print(stds_morning)
print()
print("EVENING COMMUTE STANDARD DEV:")
print(stds_evening)

MORNING COMMUTE STANDARD DEV:
                            Distance (mi)  Duration in Traffic (min)
Location                                                            
AMLI                             0.479992                   1.718829
Barton Creek Landing             0.000000                   0.235435
Burnet                           0.000000                   0.922936
Echo Apartments                  0.447888                   0.921504
Gables Central Park              0.000000                   0.984610
Koenig Flats                     0.457467                   1.045712
Norwalk                          0.000000                   0.501506
Pressler                         0.000000                   0.441101
The Boulevard at Town Lake       0.000000                   0.000000
The Richardson                   0.000000                   0.777463
Westerly 360                     0.344642                   0.594089
Woodlawn                         0.000000                   1.014785

EVE

In [30]:
# What Locations do we want to look at?
labels = ['Norwalk','The Richardson', "Westerly 360", "Woodlawn", "Gables Central Park", "The Boulevard at Town Lake",
    "Barton Creek Landing", "Pressler", "Koenig Flats", "Burnet", "Echo Apartments", "Amli"]

## Data Visualization

### Weekly Averages

In [37]:
# Average week Morning commute times
weekdays = df.loc[(df['Weekday']=='Monday')|(df['Weekday']=='Tuesday')|(df['Weekday']=='Wednesday')|
                  (df['Weekday']=='Thursday')|(df['Weekday']=='Friday')]

fig = go.Figure(data=[go.Bar(x=labels, y=weekdays.loc[weekdays['Time'] == "7:30",'Duration in Traffic (min)'],
          marker_color="rgb(255,128,0)", width=0.2, name="7:30"),
                      go.Bar(x=labels, y=weekdays.loc[weekdays['Time'] == "8:0",'Duration in Traffic (min)'],
          marker_color="rgb(153,76,0)", width=0.2, name="8:00")])
                      
fig.update_layout(title="Weekly Average Morning Commute",
    xaxis_title="Starting Location",
    yaxis_title="Duration in Traffic (min)",
    barmode='group',
    bargap=0.5,
    bargroupgap=0.1,
    xaxis_tickangle=-45
)

fig.show()

In [38]:
# Average week Evening commute times
weekdays = df.loc[(df['Weekday']=='Monday')|(df['Weekday']=='Tuesday')|(df['Weekday']=='Wednesday')|
                  (df['Weekday']=='Thursday')|(df['Weekday']=='Friday')]

fig = go.Figure(data=[go.Bar(x=labels, y=weekdays.loc[weekdays['Time'] == "16:30",'Duration in Traffic (min)'],
          marker_color="rgb(0,128,255)", width=0.2, name="4:30"),
                      go.Bar(x=labels, y=weekdays.loc[weekdays['Time'] == "17:0",'Duration in Traffic (min)'],
          marker_color="rgb(0,76,153)", width=0.2, name="5:00")])
                      
fig.update_layout(title="Weekly Average Evening Commute",
    xaxis_title="Starting Location",
    yaxis_title="Duration in Traffic (min)",
    barmode='group',
    bargap=0.5,
    bargroupgap=0.1,
    xaxis_tickangle=-45
)

fig.show()

### Daily Averages

In [39]:
# Monday Morning
monday = df.loc[(df['Weekday']=='Monday')]
fig = go.Figure(data=[go.Bar(x=labels, y=monday.loc[monday['Time'] == "7:30",'Duration in Traffic (min)'],
          marker_color="rgb(128,255,0)", width=0.2, name="7:30"),
                      go.Bar(x=labels, y=monday.loc[monday['Time'] == "8:0",'Duration in Traffic (min)'],
          marker_color="rgb(75,153,0)", width=0.2, name="8:00")])
                      
fig.update_layout(title="Monday Morning Commute",
    xaxis_title="Starting Location",
    yaxis_title="Duration in Traffic (min)",
    barmode='group',
    bargap=0.5,
    bargroupgap=0.1,
    xaxis_tickangle=-45
)

fig.show()

In [40]:
# Monday Evening
fig = go.Figure(data=[go.Bar(x=labels, y=monday.loc[monday['Time'] == "16:30",'Duration in Traffic (min)'],
          marker_color="rgb(127,0,255)", width=0.2, name="4:30"),
                      go.Bar(x=labels, y=monday.loc[monday['Time'] == "17:0",'Duration in Traffic (min)'],
          marker_color="rgb(76,0,153)", width=0.2, name="5:00")])
                      
fig.update_layout(title="Monday Evening Commute",
    xaxis_title="Starting Location",
    yaxis_title="Duration in Traffic (min)",
    barmode='group',
    bargap=0.5,
    bargroupgap=0.1,
    xaxis_tickangle=-45
)

fig.show()

In [41]:
# Friday Evening
friday = df.loc[(df['Weekday']=='Friday')]
fig = go.Figure(data=[go.Bar(x=labels, y=friday.loc[friday['Time'] == "16:30",'Duration in Traffic (min)'],
          marker_color="rgb(255,0,255)", width=0.2, name="4:30"),
                      go.Bar(x=labels, y=friday.loc[friday['Time'] == "17:0",'Duration in Traffic (min)'],
          marker_color="rgb(153,0,153)", width=0.2, name="5:00")])
                      
fig.update_layout(title="Friday Evening Commute",
    xaxis_title="Starting Location",
    yaxis_title="Duration in Traffic (min)",
    barmode='group',
    bargap=0.5,
    bargroupgap=0.1,
    xaxis_tickangle=-45
)

fig.show()