In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time

In [2]:
# Impor the OpenWeatherMap API key
from api_keys import weather_api_key

In [3]:
# List of cities
cities = ["St. John's", 'Halifax', 'Québec', 'Sherbrooke', 'Montréal',
       'Ottawa', 'Gatineau', 'Oshawa', 'Toronto',
       'Hamilton', 'St. Catharines',
       'Kitchener', 'London', 'Windsor', 'Barrie',
       'Winnipeg', 'Regina', 'Saskatoon', 'Calgary', 'Edmonton',
       'Kelowna', 'Abbotsford', 'Vancouver', 'Victoria']

In [4]:
#["St. John's", 'Halifax', 'Québec', 'Sherbrooke', 'Montréal','Ottawa - Gatineau', 'Ottawa - Gatineau (Quebec part)','Ottawa - Gatineau (Ontario part)', 'Oshawa', 'Toronto','Hamilton', 'St. Catharines - Niagara','Kitchener - Cambridge - Waterloo', 'London', 'Windsor', 'Barrie','Winnipeg', 'Regina', 'Saskatoon', 'Calgary', 'Edmonton','Kelowna', 'Abbotsford - Mission', 'Vancouver', 'Victoria']

In [5]:
# Set the API base URL
url = "http://api.openweathermap.org/data/2.5/weather?" 

# Define an empty list to fetch the weather data for each city
city_data = []

# Print to logger
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters
record_count = 1
set_count = 1

# Loop through all the cities in our list to fetch weather data
for i, city in enumerate(cities):
        
    # Group cities in sets of 50 for logging purposes
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 0

    # Create endpoint URL with each city
    city_url = f"{url}appid={weather_api_key}&q={city}&units=metric"
       
    # Log the url, record, and set numbers
    print("Processing Record %s of Set %s | %s" % (record_count, set_count, city))

    # Add 1 to the record count
    record_count += 1

    # Run an API request for each of the cities
    try:
        # Parse the JSON and retrieve data
        response_json = requests.get(city_url).json()
        #json.dumps(response_json, indent=4, sort_keys=True)
        #city_weather = response_json["weather"]
        
        # Parse out latitude, longitude, max temp, humidity, cloudiness, wind speed, country, and date
        city_lat = response_json["coord"]["lat"]
        city_lng = response_json["coord"]["lon"]
        city_max_temp = response_json["main"]["temp_max"]
        city_humidity = response_json["main"]["humidity"]
        city_clouds = response_json["clouds"]["all"]
        city_wind = response_json["wind"]["speed"]
        city_country = response_json["sys"]["country"]
        city_date = response_json["dt"]
      

        # Append the City information into city_data list
        city_data.append({"City": city, 
                          "Lat": city_lat, 
                          "Lng": city_lng, 
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

    # If an error is experienced, skip the city
    except:
        print("City not found. Skipping...")
        pass
              
# Indicate that Data Loading is complete 
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

Beginning Data Retrieval     
-----------------------------
Processing Record 1 of Set 1 | St. John's
Processing Record 2 of Set 1 | Halifax
Processing Record 3 of Set 1 | Québec
Processing Record 4 of Set 1 | Sherbrooke
Processing Record 5 of Set 1 | Montréal
Processing Record 6 of Set 1 | Ottawa
Processing Record 7 of Set 1 | Gatineau
Processing Record 8 of Set 1 | Oshawa
Processing Record 9 of Set 1 | Toronto
Processing Record 10 of Set 1 | Hamilton
Processing Record 11 of Set 1 | St. Catharines
Processing Record 12 of Set 1 | Kitchener
Processing Record 13 of Set 1 | London
Processing Record 14 of Set 1 | Windsor
Processing Record 15 of Set 1 | Barrie
Processing Record 16 of Set 1 | Winnipeg
Processing Record 17 of Set 1 | Regina
Processing Record 18 of Set 1 | Saskatoon
Processing Record 19 of Set 1 | Calgary
Processing Record 20 of Set 1 | Edmonton
Processing Record 21 of Set 1 | Kelowna
Processing Record 22 of Set 1 | Abbotsford
Processing Record 23 of Set 1 | Vancouver
Processi

In [8]:
# Find out how many cities were found
len(city_data)

24

In [9]:
# Convert the cities weather data into a Pandas DataFrame
city_data_df = pd.DataFrame(city_data)

# Show Record Count
city_data_df.count()

City          24
Lat           24
Lng           24
Max Temp      24
Humidity      24
Cloudiness    24
Wind Speed    24
Country       24
Date          24
dtype: int64

In [10]:
# Display sample data
city_data_df

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,St. John's,47.5649,-52.7093,-0.57,82,75,9.26,CA,1675960609
1,Halifax,44.6453,-63.5724,0.25,38,20,8.23,CA,1675960073
2,Québec,46.8123,-71.2145,-1.0,68,75,2.57,CA,1675960610
3,Sherbrooke,45.4001,-71.8991,0.32,72,0,2.06,CA,1675960395
4,Montréal,45.5088,-73.5878,1.86,82,100,3.09,CA,1675960427
5,Ottawa,45.4112,-75.6981,-0.74,91,100,6.17,CA,1675959755
6,Gatineau,45.4772,-75.7016,-0.43,89,100,6.17,CA,1675960566
7,Oshawa,43.9001,-78.8496,2.6,99,100,6.17,CA,1675960317
8,Toronto,43.7001,-79.4163,2.95,94,100,10.29,CA,1675960399
9,Hamilton,39.1834,-84.5333,19.21,71,100,9.77,US,1675960527


In [11]:
city_data_df.index.name = 'City_IDWeather'
city_data_df

Unnamed: 0_level_0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
City_IDWeather,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,St. John's,47.5649,-52.7093,-0.57,82,75,9.26,CA,1675960609
1,Halifax,44.6453,-63.5724,0.25,38,20,8.23,CA,1675960073
2,Québec,46.8123,-71.2145,-1.0,68,75,2.57,CA,1675960610
3,Sherbrooke,45.4001,-71.8991,0.32,72,0,2.06,CA,1675960395
4,Montréal,45.5088,-73.5878,1.86,82,100,3.09,CA,1675960427
5,Ottawa,45.4112,-75.6981,-0.74,91,100,6.17,CA,1675959755
6,Gatineau,45.4772,-75.7016,-0.43,89,100,6.17,CA,1675960566
7,Oshawa,43.9001,-78.8496,2.6,99,100,6.17,CA,1675960317
8,Toronto,43.7001,-79.4163,2.95,94,100,10.29,CA,1675960399
9,Hamilton,39.1834,-84.5333,19.21,71,100,9.77,US,1675960527


In [12]:
cities_lookup_df = pd.read_csv('Resources/cities_lookup.csv')
#, encoding='ISO-8859-1'

cities_lookup_df

Unnamed: 0,City_ID,City,City_ID2,GEO,Province,City_IDWeather,Weather_City
0,0,St. John's,18372.0,St. John's,Newfoundland and Labrador [10001],0.0,St. John's
1,1,Halifax,18426.0,Halifax,Nova Scotia [12205],1.0,Halifax
2,2,Québec,18534.0,Québec,Quebec [24421],2.0,Québec
3,3,Sherbrooke,18552.0,Sherbrooke,Quebec [24433],3.0,Sherbrooke
4,4,Montréal,18588.0,Montréal,Quebec [24462],4.0,Montréal
5,5,Ottawa - Gatineau,18642.0,Ottawa-Gatineau,Ontario/Quebec [24505/35505],,
6,6,Ottawa - Gatineau (Quebec part),18606.0,Ottawa-Gatineau,Quebec part [24505],6.0,Gatineau
7,7,Ottawa - Gatineau (Ontario part),18660.0,Ottawa-Gatineau,Ontario part [35505],5.0,Ottawa
8,8,Oshawa,,,,7.0,Oshawa
9,9,Toronto,18732.0,Toronto,Ontario [35535],8.0,Toronto


In [13]:
columns_to_keep = ['City_IDWeather', 'GEO', 'City_ID2']
reduced_lookup_df = cities_lookup_df[columns_to_keep]
reduced_lookup_df

Unnamed: 0,City_IDWeather,GEO,City_ID2
0,0.0,St. John's,18372.0
1,1.0,Halifax,18426.0
2,2.0,Québec,18534.0
3,3.0,Sherbrooke,18552.0
4,4.0,Montréal,18588.0
5,,Ottawa-Gatineau,18642.0
6,6.0,Ottawa-Gatineau,18606.0
7,5.0,Ottawa-Gatineau,18660.0
8,7.0,,
9,8.0,Toronto,18732.0


In [14]:
cities_lookup_df1 = reduced_lookup_df.dropna()
cities_lookup_df1

Unnamed: 0,City_IDWeather,GEO,City_ID2
0,0.0,St. John's,18372.0
1,1.0,Halifax,18426.0
2,2.0,Québec,18534.0
3,3.0,Sherbrooke,18552.0
4,4.0,Montréal,18588.0
6,6.0,Ottawa-Gatineau,18606.0
7,5.0,Ottawa-Gatineau,18660.0
9,8.0,Toronto,18732.0
10,9.0,Hamilton,18750.0
11,10.0,St.Catharines-Niagara,18768.0


In [15]:
cities_lookup_df2 = cities_lookup_df1.astype({'City_ID2':'int', 'City_IDWeather':'int'})
cities_lookup_df2

Unnamed: 0,City_IDWeather,GEO,City_ID2
0,0,St. John's,18372
1,1,Halifax,18426
2,2,Québec,18534
3,3,Sherbrooke,18552
4,4,Montréal,18588
6,6,Ottawa-Gatineau,18606
7,5,Ottawa-Gatineau,18660
9,8,Toronto,18732
10,9,Hamilton,18750
11,10,St.Catharines-Niagara,18768


In [16]:
merged_df = city_data_df.merge(cities_lookup_df2, on='City_IDWeather', how='inner')
merged_df

Unnamed: 0,City_IDWeather,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date,GEO,City_ID2
0,0,St. John's,47.5649,-52.7093,-0.57,82,75,9.26,CA,1675960609,St. John's,18372
1,1,Halifax,44.6453,-63.5724,0.25,38,20,8.23,CA,1675960073,Halifax,18426
2,2,Québec,46.8123,-71.2145,-1.0,68,75,2.57,CA,1675960610,Québec,18534
3,3,Sherbrooke,45.4001,-71.8991,0.32,72,0,2.06,CA,1675960395,Sherbrooke,18552
4,4,Montréal,45.5088,-73.5878,1.86,82,100,3.09,CA,1675960427,Montréal,18588
5,5,Ottawa,45.4112,-75.6981,-0.74,91,100,6.17,CA,1675959755,Ottawa-Gatineau,18660
6,6,Gatineau,45.4772,-75.7016,-0.43,89,100,6.17,CA,1675960566,Ottawa-Gatineau,18606
7,8,Toronto,43.7001,-79.4163,2.95,94,100,10.29,CA,1675960399,Toronto,18732
8,9,Hamilton,39.1834,-84.5333,19.21,71,100,9.77,US,1675960527,Hamilton,18750
9,10,St. Catharines,43.1668,-79.2496,6.47,97,100,3.09,CA,1675960457,St.Catharines-Niagara,18768


In [17]:
len(merged_df)

23

In [18]:
# Export the merged_df into a csv

merged_df.to_csv("weather_data.csv", index_label="City_ID2")