In [10]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

In [2]:
#import dataset
wyp2018 = pd.read_csv('../../dane/wyp2018_enriched.csv')
pog2018 = pd.read_csv('../../dane/pogoda_2018.csv')

In [3]:
#deal with missing values

#check what is missing
pog2018.eq(999.9).sum()

daynumber     0
dayofweek     0
meantemp      0
visib        40
wdsp          0
fog           0
prcp          0
dtype: int64

In [4]:
#fill with avg value from column

for index,value in pog2018.iterrows():
    if pog2018['visib'][index] == 999.9:
        pog2018['visib'][index] = pog2018[pog2018['visib'] != 999.9]['visib'].mean()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [5]:
#convert to metric values
pog2018['meantemp'] = ((pog2018['meantemp']-32)*5)/9 #Farenheit to Celsius
pog2018['prcp'] = pog2018['prcp']*25.4               # inches to l/m^2
pog2018['wdsp'] = pog2018['wdsp']*0.51               # knots to m/s
pog2018['visib'] = pog2018['visib']*1609             # miles to meters

In [6]:
#merge datasets
df = pd.merge(wyp2018, pog2018, left_on='daynumber', right_on='daynumber')

In [7]:
#is there any correlation between number of rentals and the weather?
#let's create small df

#how many rentals per day?
rd = df.groupby(['month_day'])['bike_num'].count()

#avg temp
avg_temp = df.groupby(['month_day'])['meantemp'].mean().values

#precipitation
prp = df.groupby(['month_day'])['prcp'].mean().values

#wind speed
ws = df.groupby(['month_day'])['wdsp'].mean().values

#visibility
vis = df.groupby(['month_day'])['visib'].mean().values

#fog
fog = df.groupby(['month_day'])['fog'].mean().values

data = pd.DataFrame(data={'date' : rd.index, 'rentals_number' : rd.values,
                          'avg_temp' : avg_temp, 'precipitation' : prp,
                          'wind_speed' : ws, 'visibility' : vis, 'fog' : fog})

corr = data.corr()
corr.style.background_gradient(cmap='coolwarm')

Unnamed: 0,rentals_number,avg_temp,precipitation,wind_speed,visibility,fog
rentals_number,1.0,0.784425,-0.170979,-0.30756,0.338949,-0.144224
avg_temp,0.784425,1.0,0.0208911,-0.245463,0.353246,-0.149057
precipitation,-0.170979,0.0208911,1.0,0.14965,-0.0388333,-0.0266742
wind_speed,-0.30756,-0.245463,0.14965,1.0,0.252169,-0.331455
visibility,0.338949,0.353246,-0.0388333,0.252169,1.0,-0.507237
fog,-0.144224,-0.149057,-0.0266742,-0.331455,-0.507237,1.0


In [12]:
from plotly.subplots import make_subplots
# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=data['date'], y=data['rentals_number'], name="rentals_number"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=data['date'], y=data['avg_temp'], name="avg_temp"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Number of rentals vs average temperature"
)

# Set x-axis title
fig.update_xaxes(title_text="date")

# Set y-axes titles
fig.update_yaxes(title_text="<b>primary</b> rentals number", secondary_y=False)
fig.update_yaxes(title_text="<b>secondary</b> avg temp", secondary_y=True)

fig.show()