In [6]:
import pandas as pd
from matplotlib import pyplot as plt
from datetime import datetime as dt
import numpy as np

In [7]:
df = pd.read_csv('weather_trips_2022.csv', usecols=['ride_id', 'member_casual', 'DATE', 'TMIN', 'TAVG', 'TMAX'])

In [8]:
df.head()

Unnamed: 0,ride_id,member_casual,DATE,TMIN,TAVG,TMAX
0,DF208007EE5F9D79,member,2022-08-26,78,81,88
1,48A796B4996BACB0,member,2022-08-03,74,82,92
2,AE459F435C0B1D5B,member,2022-08-11,74,80,91
3,CA8B4A94620E74C0,member,2022-08-10,75,81,86
4,EB85C6F0148BC348,member,2022-08-31,72,78,85


In [9]:
df['DATE']=pd.to_datetime(df['DATE'])

In [10]:
df.dtypes

ride_id                  object
member_casual            object
DATE             datetime64[ns]
TMIN                      int64
TAVG                      int64
TMAX                      int64
dtype: object

In [11]:
# Create a line plot of the temperatures for 2022 plotted as time series using the 
# weather dataframe. For this example, use a pandas plotting function.

In [12]:
# set date as the index
df_temp = df.set_index('DATE')

In [13]:
df_temp.head()

Unnamed: 0_level_0,ride_id,member_casual,TMIN,TAVG,TMAX
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-08-26,DF208007EE5F9D79,member,78,81,88
2022-08-03,48A796B4996BACB0,member,74,82,92
2022-08-11,AE459F435C0B1D5B,member,74,80,91
2022-08-10,CA8B4A94620E74C0,member,75,81,86
2022-08-31,EB85C6F0148BC348,member,72,78,85


In [14]:
# Create a column with the count of trips per day and merge it with the weather dataframe

In [15]:
df_group = pd.DataFrame (df.groupby (['DATE'])['ride_id'].count()).reset_index()

In [16]:
df_group.rename(columns={'ride_id': 'count_of_rides_daily'}, inplace = True)

In [17]:
df = df.merge(df_group, on = "DATE" , how = 'outer', indicator = True)

In [18]:
print (df['_merge'].value_counts(dropna=False))

both          30689921
left_only            0
right_only           0
Name: _merge, dtype: int64


In [19]:
print("Shape of January 1st is", df[df['DATE'] == '2022-01-01'].shape) # Check 
print("Shape of January 2nd is", df[df['DATE'] == '2022-01-02'].shape) # Second check 

Shape of January 1st is (20895, 8)
Shape of January 2nd is (44234, 8)


In [20]:
df.head()

Unnamed: 0,ride_id,member_casual,DATE,TMIN,TAVG,TMAX,count_of_rides_daily,_merge
0,DF208007EE5F9D79,member,2022-08-26,78,81,88,111955,both
1,4BF322F1E0D74152,member,2022-08-26,78,81,88,111955,both
2,301E3D811B0D5219,member,2022-08-26,78,81,88,111955,both
3,7D4E20D0A43FCE1F,casual,2022-08-26,78,81,88,111955,both
4,BB560C341D35EB7D,member,2022-08-26,78,81,88,111955,both


In [21]:
df_temp.head()

Unnamed: 0_level_0,ride_id,member_casual,TMIN,TAVG,TMAX
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-08-26,DF208007EE5F9D79,member,78,81,88
2022-08-03,48A796B4996BACB0,member,74,82,92
2022-08-11,AE459F435C0B1D5B,member,74,80,91
2022-08-10,CA8B4A94620E74C0,member,75,81,86
2022-08-31,EB85C6F0148BC348,member,72,78,85


In [22]:
# Create a line chart of bike trip counts and temperatures plotted on a dual axis

In [23]:
# Set date as index
df_temp = df.set_index('DATE')

In [24]:
df_temp.head()

Unnamed: 0_level_0,ride_id,member_casual,TMIN,TAVG,TMAX,count_of_rides_daily,_merge
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-08-26,DF208007EE5F9D79,member,78,81,88,111955,both
2022-08-26,4BF322F1E0D74152,member,78,81,88,111955,both
2022-08-26,301E3D811B0D5219,member,78,81,88,111955,both
2022-08-26,7D4E20D0A43FCE1F,casual,78,81,88,111955,both
2022-08-26,BB560C341D35EB7D,member,78,81,88,111955,both


In [44]:
df_temp.to_csv('weather_trips_version2.csv', index=False)