In [1]:
import pandas as pd
import sqlite3
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
db_path = '../data/clean/cyclistic.db'


In [3]:
conn = sqlite3.connect(db_path)
print("Connected to database successfully!")

Connected to database successfully!


In [4]:
query1 = """
SELECT
    member_casual AS user_type,
    AVG(ride_length_min) AS avg_ride_length_min,
    COUNT(*) AS total_ride_length_min
FROM cyclistic_trips
GROUP BY member_casual
"""

In [5]:
avg_duration_df = pd.read_sql(query1, conn)
print(avg_duration_df)

avg_duration_df.to_csv('../outputs/tables/avg_ride_duration_by_type.csv', index=False)

  user_type  avg_ride_length_min  total_ride_length_min
0    casual            20.926092                2145244
1    member            12.195021                3707005


In [15]:
query2 = ("""
SELECT
    day_of_week,
    member_casual AS user_type,
    COUNT(*) AS number_of_rides
FROM cyclistic_trips
GROUP BY day_of_week, user_type
ORDER BY
    CASE day_of_week
        WHEN 'Monday' THEN 1
        WHEN 'Tuesday' THEN 2
        WHEN 'Wednesday' THEN 3
        WHEN 'Thursday' THEN 4
        WHEN 'Friday' THEN 5
        WHEN 'Saturday' THEN 6
        WHEN 'Sunday' THEN 7
    END""")

In [16]:
rides_by_day_df = pd.read_sql_query(query2, conn)
print(rides_by_day_df)
rides_by_day_df.to_csv('../outputs/tables/rides_by_day_of_week.csv', index=False)
print("Query 2 results saved")

   day_of_week user_type  number_of_rides
0       Monday    casual           252900
1       Monday    member           534344
2      Tuesday    casual           231817
3      Tuesday    member           570387
4    Wednesday    casual           268632
5    Wednesday    member           609954
6     Thursday    casual           264401
7     Thursday    member           570300
8       Friday    casual           314918
9       Friday    member           525583
10    Saturday    casual           444005
11    Saturday    member           479440
12      Sunday    casual           368571
13      Sunday    member           416997
Query 2 results saved


In [17]:
query3 = """
SELECT
    hour,
    member_casual AS user_type,
    COUNT(*) AS number_of_rides
FROM cyclistic_trips
GROUP BY hour, user_type
ORDER BY hour;
"""

In [18]:
rides_by_hour_df = pd.read_sql_query(query3, conn)
print(rides_by_hour_df)
rides_by_hour_df.to_csv('../outputs/tables/rides_by_hour.csv', index=False)
print("Query 3 results saved!")

    hour user_type  number_of_rides
0      0    casual            35260
1      0    member            32743
2      1    casual            23577
3      1    member            20060
4      2    casual            14638
5      2    member            11383
6      3    casual             8136
7      3    member             7819
8      4    casual             6402
9      4    member             8711
10     5    casual            12105
11     5    member            34593
12     6    casual            28710
13     6    member           104009
14     7    casual            53421
15     7    member           200308
16     8    casual            75633
17     8    member           255232
18     9    casual            76438
19     9    member           173796
20    10    casual            94210
21    10    member           153199
22    11    casual           120004
23    11    member           178917
24    12    casual           142007
25    12    member           204666
26    13    casual          

In [19]:
query4 = """
SELECT
    start_station_name,
    COUNT(*) AS number_of_casual_rides
FROM cyclistic_trips
WHERE member_casual = 'casual'
    AND start_station_name IS NOT NULL
GROUP BY start_station_name
ORDER BY number_of_casual_rides DESC
LIMIT 10;
"""

In [20]:
top_casual_stations_df = pd.read_sql_query(query4, conn)
print(top_casual_stations_df)
top_casual_stations_df.to_csv('../outputs/tables/top_10_casual_stations.csv', index=False)
print("Query 4 results saved!")

                   start_station_name  number_of_casual_rides
0             Streeter Dr & Grand Ave                   50903
1   DuSable Lake Shore Dr & Monroe St                   33965
2               Michigan Ave & Oak St                   25058
3  DuSable Lake Shore Dr & North Blvd                   23033
4                     Millennium Park                   22498
5                      Shedd Aquarium                   21020
6                      Dusable Harbor                   18390
7                 Theater on the Lake                   16773
8               Michigan Ave & 8th St                   13456
9                   Adler Planetarium                   12879
Query 4 results saved!


In [21]:
query5 = """
SELECT
    rideable_type,
    member_casual AS user_type,
    COUNT(*) AS number_of_rides
FROM cyclistic_trips
GROUP BY rideable_type, user_type;
"""

In [22]:
bike_type_df = pd.read_sql_query(query5, conn)
print(bike_type_df)
bike_type_df.to_csv('../outputs/tables/bike_type_preference.csv', index=False)
print("Query 5 results saved!")

      rideable_type user_type  number_of_rides
0      classic_bike    casual           968778
1      classic_bike    member          1759184
2     electric_bike    casual          1091251
3     electric_bike    member          1888699
4  electric_scooter    casual            85215
5  electric_scooter    member            59122
Query 5 results saved!


In [23]:
# !! ALWAYS CLOSE THE CONNECTION !!
conn.close()
print("Database connection closed. Analysis complete!")

Database connection closed. Analysis complete!
