In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy.stats import pearsonr, spearmanr

In [None]:
bikesharing_data = pd.read_csv('datasets/bike_sharing_daily.csv', index_col=0)
bikesharing_data.head(5)

In [None]:
bikesharing_data.shape

In [None]:
bikesharing_data.info()

In [None]:
bikesharing_data.columns

In [None]:
# convert dteday column to datetime
bikesharing_data['dteday'] = pd.DatetimeIndex(bikesharing_data['dteday'])

In [None]:
# using numpy to calculate the correlation coefficient
np.corrcoef(bikesharing_data['temp'], bikesharing_data['cnt'])

In [None]:
# using pandas to calculate the correlation coefficient
bikesharing_data['temp'].corr(bikesharing_data['cnt'])

In [None]:
# pearson formula for linear relationship between two continuous variables
pearsonr(bikesharing_data['temp'], bikesharing_data['cnt'])

In [None]:
# spearman formula used for ordinal data
spearmanr(bikesharing_data['temp'], bikesharing_data['cnt'])

In [None]:
plt.figure(figsize=(12, 8))

plt.scatter(bikesharing_data['temp'], bikesharing_data['cnt'], color='m')
plt.title('Bike Sharing Daily')
plt.xlabel('Temperature')
plt.ylabel('Count')
plt.show()

In [None]:
# using numpy to calculate the correlation coefficient
np.corrcoef(bikesharing_data['workingday'], bikesharing_data['registered'])

In [None]:
# using pandas to calculate the correlation coefficient
bikesharing_data['workingday'].corr(bikesharing_data['registered'])

In [None]:
# pearson formula for linear relationship between two continuous variables
pearsonr(bikesharing_data['workingday'], bikesharing_data['registered'])

In [None]:
# spearman formula used for ordinal data
spearmanr(bikesharing_data['workingday'], bikesharing_data['registered'])

In [None]:
ax = plt.subplot()

bikesharing_data.groupby('workingday').max()['registered'].plot(
    kind='bar', figsize=(12, 8), ax=ax, color=['r', 'c'])

plt.title('Registered Users')
plt.ylabel('Count of Registered Users')
plt.show()

In [None]:
ax = plt.subplot()

bikesharing_data.groupby('workingday').max()['casual'].plot(
    kind='bar', figsize=(12, 8), ax=ax, color=['b', 'y'])

plt.title('Casual Users')
plt.ylabel('Count of Casual Users')
plt.show()

In [None]:
# using numpy to calculate the correlation coefficient
np.corrcoef(bikesharing_data['windspeed'], bikesharing_data['cnt'])

In [None]:
# using pandas to calculate the correlation coefficient
bikesharing_data['windspeed'].corr(bikesharing_data['cnt'])

In [None]:
# pearson formula for linear relationship between two continuous variables
pearsonr(bikesharing_data['windspeed'], bikesharing_data['cnt'])

In [None]:
# spearman formula used for ordinal data
spearmanr(bikesharing_data['windspeed'], bikesharing_data['cnt'])

In [None]:
plt.figure(figsize=(12, 8))

plt.scatter(
    bikesharing_data['windspeed'], bikesharing_data['cnt'], color='limegreen')
plt.title('Bike Sharing Daily')
plt.xlabel('Windspeed')
plt.ylabel('Count')
plt.show()

In [None]:
bikesharing_data.corr()

In [None]:
plt.figure(figsize=(12, 8))

plt.matshow(bikesharing_data.corr(), fignum=False, aspect='equal')

columns = len(bikesharing_data.columns)

plt.xticks(range(columns), bikesharing_data.columns)
plt.yticks(range(columns), bikesharing_data.columns)

plt.colorbar()
plt.xticks(rotation=90)
plt.title('Correlations', y=1.2)

plt.show()

In [None]:
!pip install yellowbrick

In [None]:
target = bikesharing_data['cnt']

features = bikesharing_data.drop(['casual', 'registered', 'cnt', 'dteday'], axis=1)

In [None]:
from yellowbrick.target import FeatureCorrelation

feature_names = list(features.columns)
feature_names

In [None]:
visualizer = FeatureCorrelation(labels = feature_names)
visualizer.fit(features, target)
visualizer.poof()