In [1]:
pip install citipy

Collecting citipy
  Using cached citipy-0.0.5-py3-none-any.whl
Collecting kdtree>=0.12
  Using cached kdtree-0.16-py2.py3-none-any.whl (7.7 kB)
Installing collected packages: kdtree, citipy
Successfully installed citipy-0.0.5 kdtree-0.16
Note: you may need to restart the kernel to use updated packages.


In [6]:


# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time

# Import API key
from config import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

In [7]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

644

In [9]:
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "metric"
query_url = f"{url}appid={weather_api_key}&units={units}&q="

#counters for counting records in sets
record_count=1
set_count = 1

# empty dictionary to collect info 
weather_dict = {"Cities": [], "Country": [], "Latitude": [], "Date": [],"Max. Temperature": [], "Humidity": [], 
                           "Cloudiness": [], "Wind Speed": []}

print(" Beginning Data Retrieval \n-----------------------------")

for city_index in range(len(cities)):
    
        city_url= cities[city_index].replace(" ", "%20")  #--to format cities names with more than one word
        weather_response = requests.get(query_url+city_url).json()
        print(query_url+city_url)
        
        if weather_response['cod']=='404':
            print("City not found. Skipping...")
        else:
            print(f"Processing Record {record_count} of Set {set_count} | {cities[city_index]}")
            try:
                date = time.strftime('%m-%d-%y', time.localtime(weather_response['dt'])) # to collect only date component
                weather_dict["Date"].append(date)
                weather_dict["Cities"].append(cities[city_index])
                weather_dict["Country"].append(weather_response['sys']['country'])          
                weather_dict["Latitude"].append(weather_response['coord']['lat'])
                weather_dict["Humidity"].append(weather_response['main']['humidity'])
                weather_dict["Max. Temperature"].append(weather_response['main']['temp_max'])
                weather_dict["Cloudiness"].append(weather_response['clouds']['all'])
                weather_dict["Wind Speed"].append(weather_response['wind']['speed']) 
            except KeyError:
                     print(f"Data not complete for {cities[city_index]}..so skipping..")
                
            if (record_count%50==0):
                set_count+=1 
                record_count=1
            else:    
                record_count+=1

print("-----------------------------\n Data Retrieval Complete \n-----------------------------")

 Beginning Data Retrieval 
-----------------------------
http://api.openweathermap.org/data/2.5/weather?appid=25b31587314bafeb87fd9c4ef69d2cdb&units=metric&q=kisujszallas
Processing Record 1 of Set 1 | kisujszallas
http://api.openweathermap.org/data/2.5/weather?appid=25b31587314bafeb87fd9c4ef69d2cdb&units=metric&q=rockland
Processing Record 2 of Set 1 | rockland
http://api.openweathermap.org/data/2.5/weather?appid=25b31587314bafeb87fd9c4ef69d2cdb&units=metric&q=taulov
Processing Record 3 of Set 1 | taulov
http://api.openweathermap.org/data/2.5/weather?appid=25b31587314bafeb87fd9c4ef69d2cdb&units=metric&q=severo-kurilsk
Processing Record 4 of Set 1 | severo-kurilsk
http://api.openweathermap.org/data/2.5/weather?appid=25b31587314bafeb87fd9c4ef69d2cdb&units=metric&q=khatanga
Processing Record 5 of Set 1 | khatanga
http://api.openweathermap.org/data/2.5/weather?appid=25b31587314bafeb87fd9c4ef69d2cdb&units=metric&q=bambous%20virieux
Processing Record 6 of Set 1 | bambous virieux
http://api.

In [17]:
weather_df = pd.DataFrame(weather_dict) 
#Data cleaning
weather_df = weather_df[weather_df['Humidity']<=100]  # -- few weather reports have erroneous data of more than 100% humidity
#Save to csv file
weather_df.to_csv("Weather_data.csv")

In [18]:
#Select data from weather for a single date
date_today = max(weather_df["Date"].value_counts().keys())  #-- to select the date which most of the weather data belongs to.
weather_today = weather_df.loc[weather_df["Date"]==date_today,:]  #-- to filter data for single date 
weather_today.head(10)

Unnamed: 0,Cities,Country,Latitude,Date,Max. Temperature,Humidity,Cloudiness,Wind Speed
0,kisujszallas,HU,47.2167,12-12-21,2.01,97,100,4.21
1,rockland,US,41.1668,12-12-21,6.35,57,0,0.89
2,taulov,DK,55.5458,12-12-21,7.84,93,46,2.58
3,severo-kurilsk,RU,50.6789,12-12-21,1.17,81,27,2.61
4,khatanga,RU,71.9667,12-12-21,-44.46,99,82,2.42
5,bambous virieux,MU,-20.3428,12-12-21,24.14,78,40,2.57
6,sukhobezvodnoye,RU,57.0491,12-12-21,-10.5,91,80,2.77
7,atuona,PF,-9.8,12-12-21,26.1,80,83,7.79
8,tuktoyaktuk,CA,69.4541,12-12-21,-21.0,77,90,8.75
9,nikolskoye,RU,59.7035,12-12-21,-4.06,99,100,0.68
