In [2]:
import warnings
warnings.filterwarnings('ignore')

import random

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import datetime

%matplotlib inline

In [3]:
chicago = pd.read_csv("prepared_data.csv", parse_dates=["start_time","end_time"])

In [4]:
chicago.head()

Unnamed: 0.1,Unnamed: 0,start_time,end_time,start_station_id,end_station_id,start_station_name,end_station_name,bike_id,user_type,duration
0,0,2016-03-31 23:53:00,2016-04-01 00:07:00,344,458,Ravenswood Ave & Lawrence Ave,Broadway & Thorndale Ave,155,Subscriber,14.0
1,1,2016-03-31 23:46:00,2016-03-31 23:57:00,128,213,Damen Ave & Chicago Ave,Leavitt St & North Ave,4831,Subscriber,11.0
2,2,2016-03-31 23:42:00,2016-03-31 23:46:00,350,210,Ashland Ave & Chicago Ave,Ashland Ave & Division St,4232,Subscriber,4.0
3,3,2016-03-31 23:37:00,2016-03-31 23:55:00,303,458,Broadway & Cornelia Ave,Broadway & Thorndale Ave,3464,Subscriber,18.0
4,4,2016-03-31 23:33:00,2016-03-31 23:37:00,334,329,Lake Shore Dr & Belmont Ave,Lake Shore Dr & Diversey Pkwy,1750,Subscriber,4.0


In [5]:
print(chicago['start_time'].size)

3586330


In [6]:
# hinzufügen der Stunde des Starts
chicago['start_hour']= chicago["start_time"].apply(lambda x: x.hour)
# hinzufügen des Wochentags
chicago['weekday']= chicago["start_time"].apply(lambda x: x.weekday())
# hinzufügen des Monats
chicago['month']= chicago["start_time"].apply(lambda x: x.month)
#. honzufügen der Dauer der Fahrt
chicago["duration"] = (chicago["end_time"] - chicago["start_time"]).astype("timedelta64[m]")
chicago.head()

Unnamed: 0.1,Unnamed: 0,start_time,end_time,start_station_id,end_station_id,start_station_name,end_station_name,bike_id,user_type,duration,start_hour,weekday,month
0,0,2016-03-31 23:53:00,2016-04-01 00:07:00,344,458,Ravenswood Ave & Lawrence Ave,Broadway & Thorndale Ave,155,Subscriber,14.0,23,3,3
1,1,2016-03-31 23:46:00,2016-03-31 23:57:00,128,213,Damen Ave & Chicago Ave,Leavitt St & North Ave,4831,Subscriber,11.0,23,3,3
2,2,2016-03-31 23:42:00,2016-03-31 23:46:00,350,210,Ashland Ave & Chicago Ave,Ashland Ave & Division St,4232,Subscriber,4.0,23,3,3
3,3,2016-03-31 23:37:00,2016-03-31 23:55:00,303,458,Broadway & Cornelia Ave,Broadway & Thorndale Ave,3464,Subscriber,18.0,23,3,3
4,4,2016-03-31 23:33:00,2016-03-31 23:37:00,334,329,Lake Shore Dr & Belmont Ave,Lake Shore Dr & Diversey Pkwy,1750,Subscriber,4.0,23,3,3


In [7]:
# hinzufügen der Wetterdaten
weather = pd.read_csv("weather_hourly_chicago.csv", parse_dates = ["date_time"])
weather = weather.dropna()
weather["hour"] = weather["date_time"].apply(lambda x:x.hour)
weather["day"] = weather["date_time"].apply(lambda x:x.day)
weather.sort_values(by="date_time")
weather.head()

Unnamed: 0,date_time,max_temp,min_temp,precip,hour,day
0,2015-01-02 01:00:00,-1.7,-1.7,0.0,1,2
1,2015-01-02 02:00:00,-2.2,-2.2,0.0,2,2
2,2015-01-02 03:00:00,-2.8,-2.8,0.0,3,2
3,2015-01-02 04:00:00,-3.3,-3.3,0.0,4,2
4,2015-01-02 05:00:00,-4.4,-4.4,0.0,5,2


In [8]:
# verbinden mit den Fahrraddaten
chicago["day"] = chicago["start_time"].apply(lambda x: x.day)
chicago.head()

Unnamed: 0.1,Unnamed: 0,start_time,end_time,start_station_id,end_station_id,start_station_name,end_station_name,bike_id,user_type,duration,start_hour,weekday,month,day
0,0,2016-03-31 23:53:00,2016-04-01 00:07:00,344,458,Ravenswood Ave & Lawrence Ave,Broadway & Thorndale Ave,155,Subscriber,14.0,23,3,3,31
1,1,2016-03-31 23:46:00,2016-03-31 23:57:00,128,213,Damen Ave & Chicago Ave,Leavitt St & North Ave,4831,Subscriber,11.0,23,3,3,31
2,2,2016-03-31 23:42:00,2016-03-31 23:46:00,350,210,Ashland Ave & Chicago Ave,Ashland Ave & Division St,4232,Subscriber,4.0,23,3,3,31
3,3,2016-03-31 23:37:00,2016-03-31 23:55:00,303,458,Broadway & Cornelia Ave,Broadway & Thorndale Ave,3464,Subscriber,18.0,23,3,3,31
4,4,2016-03-31 23:33:00,2016-03-31 23:37:00,334,329,Lake Shore Dr & Belmont Ave,Lake Shore Dr & Diversey Pkwy,1750,Subscriber,4.0,23,3,3,31


In [9]:
chicago = chicago.merge(weather[["max_temp","min_temp","precip","hour","day"]],left_on=["day","start_hour"],right_on=["day","hour"])
chicago.head()

Unnamed: 0.1,Unnamed: 0,start_time,end_time,start_station_id,end_station_id,start_station_name,end_station_name,bike_id,user_type,duration,start_hour,weekday,month,day,max_temp,min_temp,precip,hour
0,0,2016-03-31 23:53:00,2016-04-01 00:07:00,344,458,Ravenswood Ave & Lawrence Ave,Broadway & Thorndale Ave,155,Subscriber,14.0,23,3,3,31,1.7,1.7,0.0,23
1,0,2016-03-31 23:53:00,2016-04-01 00:07:00,344,458,Ravenswood Ave & Lawrence Ave,Broadway & Thorndale Ave,155,Subscriber,14.0,23,3,3,31,13.9,13.9,0.0,23
2,0,2016-03-31 23:53:00,2016-04-01 00:07:00,344,458,Ravenswood Ave & Lawrence Ave,Broadway & Thorndale Ave,155,Subscriber,14.0,23,3,3,31,11.7,11.7,0.0,23
3,0,2016-03-31 23:53:00,2016-04-01 00:07:00,344,458,Ravenswood Ave & Lawrence Ave,Broadway & Thorndale Ave,155,Subscriber,14.0,23,3,3,31,28.9,28.9,0.0,23
4,0,2016-03-31 23:53:00,2016-04-01 00:07:00,344,458,Ravenswood Ave & Lawrence Ave,Broadway & Thorndale Ave,155,Subscriber,14.0,23,3,3,31,28.9,28.9,0.0,23


In [None]:
# aggregate to trips
feature_data = chicago.resample("H",on="start_time").agg({"start_station_id":"mean","user_type":"mean","weekday":"mean","month":"mean","duration":"mean","time_of_day":"mean","hour":"mean","day":"mean","season":"mean","max_temp":"mean","precip":"mean","bike_id":"count"})
feature_data.rename(columns={"bike_id":"trips"},inplace=True)

In [None]:
fig, ax = plt.subplots(figsize = (8,6))
# Plotting the data
ax.scatter(max_temp, trips, marker='x',)
ax.set_xlabel("Trips")
ax.set_ylabel("Max Temperature(°C)")

plt.show()