In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import RFE
import ipywidgets as widgets
%matplotlib inline

In [None]:
bikeData = pd.read_csv('C:\\Users\\joshm\\Documents\\bikedemand.csv')

# Bike Demand Application:
The purpose of this application is to predict the amount of bikes that will be rented based on the temperature outside (in fahrenheit). The user can input a custom temperature to get a predicted number of bikes that will be rented.

# Data:
Let's examine a sample of the data we'll be using. There are a number of different interesting variables, but this application is mainly concerned with the actual temperature and the number of bike rentals.

In [None]:
frame = pd.DataFrame(bikeData)
frame.head(n=5)

# Bike Rental Histogram
The below histogram shows the distribution of bike rentals accross the data. The rental amount seems to most commonly fall around the 350-500 range.

In [None]:
plt.figure(figsize = (10,5))
frame['rentals'].plot(kind="hist")
plt.legend()

# Season Pie Chart
The below chart shows the distribution of bike rentals by season. As shown, it appears bikes are rented most frequently in summer. This might be a sign that the amount of bike rentals is positively correlated with the temperature. 

In [None]:
y = np.array([32,28,26,14])
seasons = ["Summer","Spring","Fall","Winter"]
colors = ["yellow", "lightgreen", "orange", "lightblue"]
labels = ["1,2,3,4"]
plt.pie(y, labels = seasons, shadow = True, colors = colors)
plt.show()

In [None]:
x = frame.drop('rentals',1)
y = frame['rentals']

# Heatmap
The below heatmap shows the correlation between the amount of bike rentals and the other attributes in the data. As we can see, it appears the temperature and temperature feel have the highest correlation. We can better visualize this with a scatteplot.

In [None]:
plt.figure(figsize=(9,8))
correlation = frame.corr()
sns.heatmap(correlation, annot = True, cmap=plt.cm.RdYlBu)
plt.show()

In [None]:
frame = frame.drop(["season","temp feel", "humidity","windspeed","rentals"],axis=1)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(frame, y, test_size = 0.25)

In [None]:
lrm = linear_model.LinearRegression()
lrm_model = lrm.fit(x_train, y_train)
predictions = lrm.predict(x_test)

# Scatterplots
The first scatterplot shows the predicted rentals using our linear regression model vs the historical rentals from the data. The second shows the original data and the correlation between the temperature and amount of bike rentals with a regression line.

In [None]:
plt.scatter(y_test,predictions)
plt.title("Historical vs Predicted Bike Rentals")
plt.xlabel('Historical rentals')
plt.ylabel('Predicted rentals')

In [None]:
plt.xlabel('Temperature')
plt.ylabel('Bike Rentals')
plt.scatter(frame.temp,y, color ="blue")
plt.plot(frame.temp,lrm.predict(frame[['temp']]),color='red')

# Linear Regression Prediction
The below score shows the accuracy of our linear regression model with some examples of actual vs predicted data. The accuracy may vary depending on the group of data that is selected to train the model.

In [None]:
print('Score:', lrm_model.score(x_test,y_test))

In [None]:
resultsFrame = pd.DataFrame(x_test)
resultsFrame['Historical Rentals'] = y_test
resultsFrame['Rental Predictions'] = predictions
resultsFrame.head(n=5)

# Let's predict bike rentals!
Below the user can calculate their own bike rental prediction using our linear regression model.

In [None]:
slider = widgets.IntSlider(min=5,max=100,description='Temperature',disabled=False,continuous_update=True,orientation='horizontal',readout=True,readout_format='d')
def bikerentals(rental):
    temp = rental
    temp2 = lrm.predict([[temp]])
    return int(temp2)
print("Predicted bike rentals:")
widgets.interact(bikerentals, rental=slider) 
print("bikes")