In [67]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time

In [68]:
# Impor the OpenWeatherMap API key
from api_keys import weather_api_key

In [69]:
# List of cities
cities = ['Halifax', 'Québec', 'Montréal', 'Ottawa',
       'Gatineau', 'Toronto', 'Hamilton',
       'St. Catharines', 'Kitchener',
       'London', 'Windsor', 'Winnipeg', 'Calgary', 'Edmonton',
       'Vancouver']

In [70]:
# Set the API base URL
url = "http://api.openweathermap.org/data/2.5/weather?q=" 

#https://api.openweathermap.org/data/2.5/weather?q={city name},{country code}&appid={API key}

# Define an empty list to fetch the weather data for each city
city_data = []

# Print to logger
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters
record_count = 1
set_count = 1

# Loop through all the cities in our list to fetch weather data
for i, city in enumerate(cities):
        
    # Group cities in sets of 50 for logging purposes
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 0

    # Create endpoint URL with each city
    #city_url = f"{url}appid={weather_api_key}&q={city}&units=metric"
    city_url = f"{url}{city},CA&appid={weather_api_key}"
        
    # Log the url, record, and set numbers
    print("Processing Record %s of Set %s | %s" % (record_count, set_count, city))

    # Add 1 to the record count
    record_count += 1

    # Run an API request for each of the cities
    try:
        # Parse the JSON and retrieve data
        response_json = requests.get(city_url).json()
        #json.dumps(response_json, indent=4, sort_keys=True)
        #city_weather = response_json["weather"]
        
        # Parse out latitude, longitude, max temp, humidity, cloudiness, wind speed, country, and date
        city_lat = response_json["coord"]["lat"]
        city_lng = response_json["coord"]["lon"]
        city_max_temp = response_json["main"]["temp_max"]
        city_humidity = response_json["main"]["humidity"]
        city_clouds = response_json["clouds"]["all"]
        city_wind = response_json["wind"]["speed"]
        city_country = response_json["sys"]["country"]
        city_date = response_json["dt"]
      

        # Append the City information into city_data list
        city_data.append({"City": city, 
                          "Lat": city_lat, 
                          "Lng": city_lng, 
                          "Max Temp": city_max_temp-273.15,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

    # If an error is experienced, skip the city
    except:
        print("City not found. Skipping...")
        pass
              
# Indicate that Data Loading is complete 
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

Beginning Data Retrieval     
-----------------------------
Processing Record 1 of Set 1 | Halifax
Processing Record 2 of Set 1 | Québec
Processing Record 3 of Set 1 | Montréal
Processing Record 4 of Set 1 | Ottawa
Processing Record 5 of Set 1 | Gatineau
Processing Record 6 of Set 1 | Toronto
Processing Record 7 of Set 1 | Hamilton
Processing Record 8 of Set 1 | St. Catharines
Processing Record 9 of Set 1 | Kitchener
Processing Record 10 of Set 1 | London
Processing Record 11 of Set 1 | Windsor
Processing Record 12 of Set 1 | Winnipeg
Processing Record 13 of Set 1 | Calgary
Processing Record 14 of Set 1 | Edmonton
Processing Record 15 of Set 1 | Vancouver
-----------------------------
Data Retrieval Complete      
-----------------------------


In [71]:
# Find out how many cities were found
len(city_data)

15

In [72]:
# Convert the cities weather data into a Pandas DataFrame
city_data_df = pd.DataFrame(city_data)

# Show Record Count
city_data_df.count()

City          15
Lat           15
Lng           15
Max Temp      15
Humidity      15
Cloudiness    15
Wind Speed    15
Country       15
Date          15
dtype: int64

In [73]:
# Display sample data
city_data_df

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,Halifax,44.6453,-63.5724,-0.44,57,100,2.06,CA,1676003402
1,Québec,46.8123,-71.2145,-2.11,93,100,11.32,CA,1676002865
2,Montréal,45.5088,-73.5878,3.47,95,100,6.17,CA,1676003421
3,Ottawa,45.4112,-75.6981,1.53,93,100,6.17,CA,1676003277
4,Gatineau,45.4772,-75.7016,1.67,92,100,6.17,CA,1676003447
5,Toronto,43.7001,-79.4163,6.71,95,100,6.17,CA,1676002792
6,Hamilton,43.2334,-79.9496,6.34,82,100,14.92,CA,1676003415
7,St. Catharines,43.1668,-79.2496,8.47,79,98,12.86,CA,1676003579
8,Kitchener,43.4254,-80.5112,4.87,89,100,6.17,CA,1676003297
9,London,42.9834,-81.233,4.18,82,100,8.75,CA,1676003466


In [74]:
city_data_df.index.name = 'City_IDWeather'
city_data_df

Unnamed: 0_level_0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
City_IDWeather,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,Halifax,44.6453,-63.5724,-0.44,57,100,2.06,CA,1676003402
1,Québec,46.8123,-71.2145,-2.11,93,100,11.32,CA,1676002865
2,Montréal,45.5088,-73.5878,3.47,95,100,6.17,CA,1676003421
3,Ottawa,45.4112,-75.6981,1.53,93,100,6.17,CA,1676003277
4,Gatineau,45.4772,-75.7016,1.67,92,100,6.17,CA,1676003447
5,Toronto,43.7001,-79.4163,6.71,95,100,6.17,CA,1676002792
6,Hamilton,43.2334,-79.9496,6.34,82,100,14.92,CA,1676003415
7,St. Catharines,43.1668,-79.2496,8.47,79,98,12.86,CA,1676003579
8,Kitchener,43.4254,-80.5112,4.87,89,100,6.17,CA,1676003297
9,London,42.9834,-81.233,4.18,82,100,8.75,CA,1676003466


In [75]:
cities_lookup_df = pd.read_csv('Resources/cities_lookup.csv')
#, encoding='ISO-8859-1'

cities_lookup_df

Unnamed: 0,City_ID,City,City_ID2,GEO,Province,City_IDWeather,Weather_City
0,0,Halifax,18426,Halifax,Nova Scotia [12205],0,Halifax
1,1,Québec,18534,Québec,Quebec [24421],1,Québec
2,2,Montréal,18588,Montréal,Quebec [24462],2,Montréal
3,3,Ottawa - Gatineau (Quebec part),18606,Ottawa-Gatineau,Quebec part [24505],4,Gatineau
4,4,Ottawa - Gatineau (Ontario part),18660,Ottawa-Gatineau,Ontario part [35505],3,Ottawa
5,5,Toronto,18732,Toronto,Ontario [35535],5,Toronto
6,6,Hamilton,18750,Hamilton,Ontario [35537],6,Hamilton
7,7,St. Catharines - Niagara,18768,St.Catharines-Niagara,Ontario [35539],7,St. Catharines
8,8,Kitchener - Cambridge - Waterloo,18786,Kitchener-Cambridge-Waterloo,Ontario [35541],8,Kitchener
9,9,London,18840,London,Ontario [35555],9,London


In [76]:
columns_to_keep = ['City_IDWeather', 'GEO', 'City_ID2']
reduced_lookup_df = cities_lookup_df[columns_to_keep]
reduced_lookup_df

Unnamed: 0,City_IDWeather,GEO,City_ID2
0,0,Halifax,18426
1,1,Québec,18534
2,2,Montréal,18588
3,4,Ottawa-Gatineau,18606
4,3,Ottawa-Gatineau,18660
5,5,Toronto,18732
6,6,Hamilton,18750
7,7,St.Catharines-Niagara,18768
8,8,Kitchener-Cambridge-Waterloo,18786
9,9,London,18840


In [77]:
cities_lookup_df1 = reduced_lookup_df.dropna()
cities_lookup_df1

Unnamed: 0,City_IDWeather,GEO,City_ID2
0,0,Halifax,18426
1,1,Québec,18534
2,2,Montréal,18588
3,4,Ottawa-Gatineau,18606
4,3,Ottawa-Gatineau,18660
5,5,Toronto,18732
6,6,Hamilton,18750
7,7,St.Catharines-Niagara,18768
8,8,Kitchener-Cambridge-Waterloo,18786
9,9,London,18840


In [78]:
cities_lookup_df2 = cities_lookup_df1.astype({'City_ID2':'int', 'City_IDWeather':'int'})
cities_lookup_df2

Unnamed: 0,City_IDWeather,GEO,City_ID2
0,0,Halifax,18426
1,1,Québec,18534
2,2,Montréal,18588
3,4,Ottawa-Gatineau,18606
4,3,Ottawa-Gatineau,18660
5,5,Toronto,18732
6,6,Hamilton,18750
7,7,St.Catharines-Niagara,18768
8,8,Kitchener-Cambridge-Waterloo,18786
9,9,London,18840


In [84]:
# Merge the dataframes to add the other names for the city to merge later
merged_df = city_data_df.merge(cities_lookup_df2, on='City_IDWeather', how='inner')
merged_df

Unnamed: 0,City_IDWeather,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date,GEO,City_ID2
0,0,Halifax,44.6453,-63.5724,-0.44,57,100,2.06,CA,1676003402,Halifax,18426
1,1,Québec,46.8123,-71.2145,-2.11,93,100,11.32,CA,1676002865,Québec,18534
2,2,Montréal,45.5088,-73.5878,3.47,95,100,6.17,CA,1676003421,Montréal,18588
3,3,Ottawa,45.4112,-75.6981,1.53,93,100,6.17,CA,1676003277,Ottawa-Gatineau,18660
4,4,Gatineau,45.4772,-75.7016,1.67,92,100,6.17,CA,1676003447,Ottawa-Gatineau,18606
5,5,Toronto,43.7001,-79.4163,6.71,95,100,6.17,CA,1676002792,Toronto,18732
6,6,Hamilton,43.2334,-79.9496,6.34,82,100,14.92,CA,1676003415,Hamilton,18750
7,7,St. Catharines,43.1668,-79.2496,8.47,79,98,12.86,CA,1676003579,St.Catharines-Niagara,18768
8,8,Kitchener,43.4254,-80.5112,4.87,89,100,6.17,CA,1676003297,Kitchener-Cambridge-Waterloo,18786
9,9,London,42.9834,-81.233,4.18,82,100,8.75,CA,1676003466,London,18840


In [88]:
# Reduce the number of columns
columns_to_keep1 = ['Wind Speed', 'Max Temp', 'City_ID2', 'City']
weather_df = merged_df[columns_to_keep1]
weather_df

Unnamed: 0,Wind Speed,Max Temp,City_ID2,City
0,2.06,-0.44,18426,Halifax
1,11.32,-2.11,18534,Québec
2,6.17,3.47,18588,Montréal
3,6.17,1.53,18660,Ottawa
4,6.17,1.67,18606,Gatineau
5,6.17,6.71,18732,Toronto
6,14.92,6.34,18750,Hamilton
7,12.86,8.47,18768,St. Catharines
8,6.17,4.87,18786,Kitchener
9,8.75,4.18,18840,London


In [89]:
len(weather_df)

15

In [90]:
# Export the merged_df into a csv
weather_df.to_csv("Output/weather_data.csv", index_label="City_ID2")