In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime

## We scrape the daylight data from the site https://www.timeanddate.com/sun/canada/vancouver.

In [2]:
# Getting the sunlight data for a given month and year
def data_from(month,year):

    base_url = "https://www.timeanddate.com/sun/canada/vancouver"
    url = f"{base_url}?month={month}&year={year}"
    print(url)
    
    # Scrapping the webpage
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    #Find the table that stores the data
    table = soup.find_all('table',attrs = {"class":"zebra tb-sm fw tb-hover va-m small","id": "as-monthsun"})

    # (Manually) create the headers and extract the rows
    headers = ['Date','Sunrise','Sunset','Daylength','Difference (min)','Astronomical Twilight Start','Astronomical Twilight End',
          'Nautical Twilight Start', 'Nautical Twilight End', 'Civil Twilight Start','Civil Twilight End', 'Solar Noon Time', 'Solar Noon Mil. mi']

    rows = table[0].find_all('tr')

    # Iterate through each row and extract data
    data = []
    day = 0

    # Ignore the last row "All times are local times for Vancouver..."
    for row in rows[:-1]:
        # In the March data, there's a row "Note: hours shift because clocks change forward 1 hour".
        if not row.text.strip().startswith('Note'):
            cells = row.find_all('td')
            
            if cells:
            #Create datetime 
                day+= 1
                dt = datetime(year,month,day)

                #Extra cell data and insert datetime
                row_data = [cell.text.strip() for cell in cells]
                row_data.insert(0,dt)
            
                data.append(row_data)

    # Create the panda data frame
    df =  pd.DataFrame(data, columns = headers)

    return df

In [3]:
# Data from May 2024
data_from(5,2024).sample(5)

https://www.timeanddate.com/sun/canada/vancouver?month=5&year=2024


Unnamed: 0,Date,Sunrise,Sunset,Daylength,Difference (min),Astronomical Twilight Start,Astronomical Twilight End,Nautical Twilight Start,Nautical Twilight End,Civil Twilight Start,Civil Twilight End,Solar Noon Time,Solar Noon Mil. mi
15,2024-05-16,5:27 am ↑ (58°),8:51 pm ↑ (302°),15:24:35,+2:38,2:48 am,11:32 pm,3:56 am,10:22 pm,4:48 am,9:30 pm,1:08 pm (60.1°),94.0
23,2024-05-24,5:17 am ↑ (56°),9:01 pm ↑ (305°),15:43:50,+2:12,2:21 am,-,3:42 am,10:37 pm,4:37 am,9:42 pm,1:09 pm (61.7°),94.144
27,2024-05-28,5:14 am ↑ (54°),9:06 pm ↑ (306°),15:52:01,+1:56,-2:07 am,12:11 am-,3:36 am,10:44 pm,4:33 am,9:47 pm,1:09 pm (62.4°),94.21
30,2024-05-31,5:11 am ↑ (54°),9:09 pm ↑ (306°),15:57:26,+1:44,-1:55 am,12:24 am-,3:32 am,10:49 pm,4:30 am,9:51 pm,1:10 pm (62.8°),94.257
26,2024-05-27,5:15 am ↑ (55°),9:05 pm ↑ (305°),15:50:04,+2:00,-2:11 am,12:07 am-,3:37 am,10:42 pm,4:34 am,9:46 pm,1:09 pm (62.2°),94.194


In [4]:
# Getting all data from Jan 2017 to Dec 2024

months = [1,2,3,4,5,6,7,8,9,10,11,12]
years = [2017,2018,2019,2020,2021,2022,2023,2024]
df_list = []

for year in years:
    for month in months:
        df =data_from(month,year)
        df_list.append(df)

        #Run the following code only if you want to store the csv file
        #df.to_csv(f'vancouver_sunrise_sunset_{year}_{month}.csv', index=False)

https://www.timeanddate.com/sun/canada/vancouver?month=1&year=2017
https://www.timeanddate.com/sun/canada/vancouver?month=2&year=2017
https://www.timeanddate.com/sun/canada/vancouver?month=3&year=2017
https://www.timeanddate.com/sun/canada/vancouver?month=4&year=2017
https://www.timeanddate.com/sun/canada/vancouver?month=5&year=2017
https://www.timeanddate.com/sun/canada/vancouver?month=6&year=2017
https://www.timeanddate.com/sun/canada/vancouver?month=7&year=2017
https://www.timeanddate.com/sun/canada/vancouver?month=8&year=2017
https://www.timeanddate.com/sun/canada/vancouver?month=9&year=2017
https://www.timeanddate.com/sun/canada/vancouver?month=10&year=2017
https://www.timeanddate.com/sun/canada/vancouver?month=11&year=2017
https://www.timeanddate.com/sun/canada/vancouver?month=12&year=2017
https://www.timeanddate.com/sun/canada/vancouver?month=1&year=2018
https://www.timeanddate.com/sun/canada/vancouver?month=2&year=2018
https://www.timeanddate.com/sun/canada/vancouver?month=3&ye

In [5]:
# Combine all data from Jan 2017 to May 2024
combined_df = pd.concat(df_list[:-7], ignore_index=True)
combined_df

Unnamed: 0,Date,Sunrise,Sunset,Daylength,Difference (min),Astronomical Twilight Start,Astronomical Twilight End,Nautical Twilight Start,Nautical Twilight End,Civil Twilight Start,Civil Twilight End,Solar Noon Time,Solar Noon Mil. mi
0,2017-01-01,8:07 am ↑ (125°),4:25 pm ↑ (235°),8:17:20,+1:03,6:11 am,6:21 pm,6:49 am,5:43 pm,7:30 am,5:02 pm,12:16 pm (17.8°),91.406
1,2017-01-02,8:07 am ↑ (125°),4:26 pm ↑ (235°),8:18:28,+1:08,6:11 am,6:22 pm,6:49 am,5:43 pm,7:30 am,5:03 pm,12:16 pm (17.9°),91.405
2,2017-01-03,8:07 am ↑ (125°),4:27 pm ↑ (235°),8:19:42,+1:13,6:11 am,6:23 pm,6:49 am,5:44 pm,7:30 am,5:04 pm,12:17 pm (18.0°),91.404
3,2017-01-04,8:07 am ↑ (125°),4:28 pm ↑ (235°),8:21:00,+1:18,6:11 am,6:24 pm,6:49 am,5:45 pm,7:30 am,5:05 pm,12:17 pm (18.1°),91.404
4,2017-01-05,8:07 am ↑ (125°),4:29 pm ↑ (235°),8:22:24,+1:23,6:11 am,6:25 pm,6:49 am,5:46 pm,7:29 am,5:06 pm,12:18 pm (18.3°),91.405
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2703,2024-05-27,5:15 am ↑ (55°),9:05 pm ↑ (305°),15:50:04,+2:00,-2:11 am,12:07 am-,3:37 am,10:42 pm,4:34 am,9:46 pm,1:09 pm (62.2°),94.194
2704,2024-05-28,5:14 am ↑ (54°),9:06 pm ↑ (306°),15:52:01,+1:56,-2:07 am,12:11 am-,3:36 am,10:44 pm,4:33 am,9:47 pm,1:09 pm (62.4°),94.210
2705,2024-05-29,5:13 am ↑ (54°),9:07 pm ↑ (306°),15:53:53,+1:52,-2:03 am,12:15 am-,3:35 am,10:46 pm,4:32 am,9:48 pm,1:10 pm (62.5°),94.226
2706,2024-05-30,5:12 am ↑ (54°),9:08 pm ↑ (306°),15:55:42,+1:48,-1:59 am,12:20 am-,3:33 am,10:47 pm,4:31 am,9:49 pm,1:10 pm (62.7°),94.242


In [6]:
# Getting the daylight data
day_light = combined_df[['Date','Daylength']]
day_light

Unnamed: 0,Date,Daylength
0,2017-01-01,8:17:20
1,2017-01-02,8:18:28
2,2017-01-03,8:19:42
3,2017-01-04,8:21:00
4,2017-01-05,8:22:24
...,...,...
2703,2024-05-27,15:50:04
2704,2024-05-28,15:52:01
2705,2024-05-29,15:53:53
2706,2024-05-30,15:55:42


In [7]:
day_light.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2708 entries, 0 to 2707
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       2708 non-null   datetime64[ns]
 1   Daylength  2708 non-null   object        
dtypes: datetime64[ns](1), object(1)
memory usage: 42.4+ KB
