In [1]:
import pandas as pd

In [2]:
df=pd.read_csv('bikedata.csv')

In [3]:
df

Unnamed: 0,stationid,date,in_bike,out_bike
0,2494.0,2016-06-01,12.0,20.0
1,2494.0,2016-06-02,15.0,16.0
2,2494.0,2016-06-03,30.0,29.0
3,2494.0,2016-06-04,44.0,46.0
4,2494.0,2016-06-05,35.0,41.0
...,...,...,...,...
13201,3513.0,2017-07-27,8.0,8.0
13202,3513.0,2017-07-28,13.0,16.0
13203,3513.0,2017-07-29,7.0,12.0
13204,3513.0,2017-07-30,6.0,11.0


In [4]:
import holidays

df['date'] = pd.to_datetime(df['date'])

df['is_weekday'] = df['date'].dt.dayofweek < 5
us_holidays = holidays.UnitedStates()
df['is_holiday'] = df['date'].isin(us_holidays)

print(df)

       stationid       date  in_bike  out_bike  is_weekday  is_holiday
0         2494.0 2016-06-01     12.0      20.0        True       False
1         2494.0 2016-06-02     15.0      16.0        True       False
2         2494.0 2016-06-03     30.0      29.0        True       False
3         2494.0 2016-06-04     44.0      46.0       False       False
4         2494.0 2016-06-05     35.0      41.0       False       False
...          ...        ...      ...       ...         ...         ...
13201     3513.0 2017-07-27      8.0       8.0        True       False
13202     3513.0 2017-07-28     13.0      16.0        True       False
13203     3513.0 2017-07-29      7.0      12.0       False       False
13204     3513.0 2017-07-30      6.0      11.0       False       False
13205     3513.0 2017-07-31      6.0      11.0        True       False

[13206 rows x 6 columns]


In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler


df['day_of_week'] = df['date'].dt.dayofweek
df['month'] = df['date'].dt.month

train_data = df[df['date'] < '2017-07-01']
test_data = df[df['date'] >= '2017-07-01']

features = ['stationid', 'is_weekday', 'is_holiday', 'month', 'day_of_week']  
target_in_bike = 'in_bike'  
target_out_bike = 'out_bike'  

X_train, X_test = train_data[features], test_data[features]
y_train_in_bike, y_test_in_bike = train_data[target_in_bike], test_data[target_in_bike]
y_train_out_bike, y_test_out_bike = train_data[target_out_bike], test_data[target_out_bike]


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


model_decision_tree_in_bike = DecisionTreeRegressor()
model_decision_tree_in_bike.fit(X_train_scaled, y_train_in_bike)

model_decision_tree_out_bike = DecisionTreeRegressor()
model_decision_tree_out_bike.fit(X_train_scaled, y_train_out_bike)

# Random Forest
model_random_forest_in_bike = RandomForestRegressor()
model_random_forest_in_bike.fit(X_train_scaled, y_train_in_bike)

model_random_forest_out_bike = RandomForestRegressor()
model_random_forest_out_bike.fit(X_train_scaled, y_train_out_bike)

# Gradient Boosting
model_gradient_boosting_in_bike = GradientBoostingRegressor()
model_gradient_boosting_in_bike.fit(X_train_scaled, y_train_in_bike)

model_gradient_boosting_out_bike = GradientBoostingRegressor()
model_gradient_boosting_out_bike.fit(X_train_scaled, y_train_out_bike)

# K-Nearest Neighbors (KNN)
model_knn_in_bike = KNeighborsRegressor()
model_knn_in_bike.fit(X_train_scaled, y_train_in_bike)

model_knn_out_bike = KNeighborsRegressor()
model_knn_out_bike.fit(X_train_scaled, y_train_out_bike)

# Make predictions for July 2017
predictions_decision_tree_in_bike = model_decision_tree_in_bike.predict(X_test_scaled)
predictions_decision_tree_out_bike = model_decision_tree_out_bike.predict(X_test_scaled)

predictions_random_forest_in_bike = model_random_forest_in_bike.predict(X_test_scaled)
predictions_random_forest_out_bike = model_random_forest_out_bike.predict(X_test_scaled)

predictions_gradient_boosting_in_bike = model_gradient_boosting_in_bike.predict(X_test_scaled)
predictions_gradient_boosting_out_bike = model_gradient_boosting_out_bike.predict(X_test_scaled)

predictions_knn_in_bike = model_knn_in_bike.predict(X_test_scaled)
predictions_knn_out_bike = model_knn_out_bike.predict(X_test_scaled)

# Evaluate the models
mse_decision_tree_in_bike = mean_squared_error(y_test_in_bike, predictions_decision_tree_in_bike)
mse_decision_tree_out_bike = mean_squared_error(y_test_out_bike, predictions_decision_tree_out_bike)

mse_random_forest_in_bike = mean_squared_error(y_test_in_bike, predictions_random_forest_in_bike)
mse_random_forest_out_bike = mean_squared_error(y_test_out_bike, predictions_random_forest_out_bike)

mse_gradient_boosting_in_bike = mean_squared_error(y_test_in_bike, predictions_gradient_boosting_in_bike)
mse_gradient_boosting_out_bike = mean_squared_error(y_test_out_bike, predictions_gradient_boosting_out_bike)

mse_knn_in_bike = mean_squared_error(y_test_in_bike, predictions_knn_in_bike)
mse_knn_out_bike = mean_squared_error(y_test_out_bike, predictions_knn_out_bike)

print('Mean Squared Error (Decision Tree):')
print(f'in_bike: {mse_decision_tree_in_bike}, out_bike: {mse_decision_tree_out_bike}')

print('\nMean Squared Error (Random Forest):')
print(f'in_bike: {mse_random_forest_in_bike}, out_bike: {mse_random_forest_out_bike}')

print('\nMean Squared Error (Gradient Boosting):')
print(f'in_bike: {mse_gradient_boosting_in_bike}, out_bike: {mse_gradient_boosting_out_bike}')

print('\nMean Squared Error (K-Nearest Neighbors):')
print(f'in_bike: {mse_knn_in_bike}, out_bike: {mse_knn_out_bike}')


Mean Squared Error (Decision Tree):
in_bike: 53.30876500543146, out_bike: 58.04960088812526

Mean Squared Error (Random Forest):
in_bike: 53.53804821954278, out_bike: 58.31173757045329

Mean Squared Error (Gradient Boosting):
in_bike: 48.70006568757061, out_bike: 52.27308179933144

Mean Squared Error (K-Nearest Neighbors):
in_bike: 54.50014877625384, out_bike: 59.34901188725539


In [6]:
df

Unnamed: 0,stationid,date,in_bike,out_bike,is_weekday,is_holiday,day_of_week,month
0,2494.0,2016-06-01,12.0,20.0,True,False,2,6
1,2494.0,2016-06-02,15.0,16.0,True,False,3,6
2,2494.0,2016-06-03,30.0,29.0,True,False,4,6
3,2494.0,2016-06-04,44.0,46.0,False,False,5,6
4,2494.0,2016-06-05,35.0,41.0,False,False,6,6
...,...,...,...,...,...,...,...,...
13201,3513.0,2017-07-27,8.0,8.0,True,False,3,7
13202,3513.0,2017-07-28,13.0,16.0,True,False,4,7
13203,3513.0,2017-07-29,7.0,12.0,False,False,5,7
13204,3513.0,2017-07-30,6.0,11.0,False,False,6,7


In [8]:
# df.to_csv('bike infor.csv')