In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

633

In [None]:
"""set_len = 50
city_set = []

range(len(cities))

for x in range(0, len(cities), set_len):
    city_set.append(cities[x:x+set_len])

print(city_set[0])
"""

In [None]:
"""city_name = []
for i in range(len(city_set)):
    for city in city_set[i]:
        city_name.append(city)

print(city_name)"""

In [4]:
# Define Imperial units as we requires Max Temperature in Fahrenheit and Wind Speed in mph
units = "imperial"

# Base url
url = "http://api.openweathermap.org/data/2.5/weather?"

# Query url with units and city name as parameter
query_url = f"{url}appid={weather_api_key}&units={units}&q={city}"

In [6]:
# Define lists to save JSON response
city_name = []
lat = []
lon = []
max_temp = []
humidity = []
cloudiness = []
wind_speed = []
country = []
city_id = []
date = []

# Define counters to print processing logs
record_count = 1
set_count = 1

# Setting up For loop to call API and print processing log
# Since only 60 API calls can be made per min, we are creating sets of 50 cities and iterating through 
# Use time.sleep method to restrict API calls under 60 per min
# Use Try Except loop to print cities which are not in the list
# Append lists with JSON response

for i, city in enumerate(cities):
    if (i%50 == 0 and i>=50):
        set_count += 1
        record_count = 1
    
    query_url = f"{url}appid={weather_api_key}&units={units}&q={city}"
    print(f"Processing Record {record_count} of Set {set_count} | {city}")
    record_count += 1
    
    try:
        response = requests.get(query_url).json()
        city_name.append(response["name"])
        lat.append(response["coord"]["lat"])
        lon.append(response["coord"]["lon"])
        max_temp.append(response["main"]["temp_max"])
        humidity.append(response["main"]["humidity"])
        cloudiness.append(response["clouds"]["all"])
        wind_speed.append(response["wind"]["speed"])
        country.append(response["sys"]["country"])
        city_id.append(response["id"])
        date.append(response["dt"])
        time.sleep(0.5)
        
    except KeyError:
        print("City not found. Skipping...")
        pass
    

Processing Record 1 of Set 1 | tsihombe
City not found. Skipping...
Processing Record 2 of Set 1 | sora
Processing Record 3 of Set 1 | castro
Processing Record 4 of Set 1 | busselton
Processing Record 5 of Set 1 | amderma
City not found. Skipping...
Processing Record 6 of Set 1 | santa maria
Processing Record 7 of Set 1 | cape town
Processing Record 8 of Set 1 | volovo
Processing Record 9 of Set 1 | new norfolk
Processing Record 10 of Set 1 | marcona
City not found. Skipping...
Processing Record 11 of Set 1 | rikitea
Processing Record 12 of Set 1 | hobart
Processing Record 13 of Set 1 | yellowknife
Processing Record 14 of Set 1 | henties bay
Processing Record 15 of Set 1 | ardakan
Processing Record 16 of Set 1 | nanortalik
Processing Record 17 of Set 1 | mingshui
Processing Record 18 of Set 1 | varaklani
City not found. Skipping...
Processing Record 19 of Set 1 | east london
Processing Record 20 of Set 1 | kamenskoye
City not found. Skipping...
Processing Record 21 of Set 1 | lavrentiy

Processing Record 38 of Set 4 | karabash
Processing Record 39 of Set 4 | talnakh
Processing Record 40 of Set 4 | avarua
Processing Record 41 of Set 4 | nikolskoye
Processing Record 42 of Set 4 | lenine
Processing Record 43 of Set 4 | tahta
Processing Record 44 of Set 4 | tual
Processing Record 45 of Set 4 | peniche
Processing Record 46 of Set 4 | westport
Processing Record 47 of Set 4 | kahului
Processing Record 48 of Set 4 | cabra
Processing Record 49 of Set 4 | ouesso
Processing Record 50 of Set 4 | pisco
Processing Record 1 of Set 5 | ifakara
Processing Record 2 of Set 5 | germenchuk
Processing Record 3 of Set 5 | gambela
Processing Record 4 of Set 5 | griffith
Processing Record 5 of Set 5 | halifax
Processing Record 6 of Set 5 | chuy
Processing Record 7 of Set 5 | keti bandar
Processing Record 8 of Set 5 | mount isa
Processing Record 9 of Set 5 | goure
Processing Record 10 of Set 5 | wad rawah
Processing Record 11 of Set 5 | butembo
Processing Record 12 of Set 5 | alihe
Processing 

Processing Record 30 of Set 8 | srandakan
Processing Record 31 of Set 8 | sri aman
Processing Record 32 of Set 8 | pestretsy
Processing Record 33 of Set 8 | dryden
Processing Record 34 of Set 8 | lazaro cardenas
Processing Record 35 of Set 8 | port keats
Processing Record 36 of Set 8 | imeni poliny osipenko
Processing Record 37 of Set 8 | misratah
Processing Record 38 of Set 8 | louisbourg
City not found. Skipping...
Processing Record 39 of Set 8 | saryshagan
City not found. Skipping...
Processing Record 40 of Set 8 | atherton
Processing Record 41 of Set 8 | boddam
Processing Record 42 of Set 8 | erdenet
Processing Record 43 of Set 8 | kabalo
Processing Record 44 of Set 8 | jujuy
City not found. Skipping...
Processing Record 45 of Set 8 | kupang
Processing Record 46 of Set 8 | tagusao
Processing Record 47 of Set 8 | college
Processing Record 48 of Set 8 | conde
Processing Record 49 of Set 8 | sao joao da barra
Processing Record 50 of Set 8 | bataipora
Processing Record 1 of Set 9 | str

Processing Record 16 of Set 12 | teruel
Processing Record 17 of Set 12 | metu
Processing Record 18 of Set 12 | karaton
Processing Record 19 of Set 12 | maromitsa
Processing Record 20 of Set 12 | kuche
City not found. Skipping...
Processing Record 21 of Set 12 | bikaner
Processing Record 22 of Set 12 | faya
Processing Record 23 of Set 12 | santa catarina de tepehuanes
Processing Record 24 of Set 12 | touros
Processing Record 25 of Set 12 | bodden town
Processing Record 26 of Set 12 | el tarra
Processing Record 27 of Set 12 | mporokoso
Processing Record 28 of Set 12 | aden
Processing Record 29 of Set 12 | contai
Processing Record 30 of Set 12 | san nicolas
Processing Record 31 of Set 12 | moju
Processing Record 32 of Set 12 | iralaya
Processing Record 33 of Set 12 | kilindoni
Processing Record 34 of Set 12 | tura
Processing Record 35 of Set 12 | ayr
Processing Record 36 of Set 12 | ahipara
Processing Record 37 of Set 12 | vila franca do campo
Processing Record 38 of Set 12 | petropavlovs

In [10]:
len(city_name)

581

In [11]:
# Create Dataframe
cities_weather_df = pd.DataFrame({"City": city_name,
                               "Lat": lat, 
                               "Lng": lon, 
                               "Max Temp": max_temp, 
                               "Humidity": humidity, 
                               "Cloudiness": cloudiness, 
                               "Wind Speed": wind_speed, 
                               "Country": country,
                               "Date": date})
cities_weather_df

Unnamed: 0,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,41.7157,13.6141,88.79,21,62,3.56,IT,1627229229
1,-24.7911,-50.0119,70.20,39,1,9.69,BR,1627229230
2,-33.6500,115.3333,58.08,67,39,18.77,AU,1627229232
3,-29.6842,-53.8069,77.74,48,64,10.98,BR,1627229233
4,-33.9258,18.4232,64.09,64,100,1.99,ZA,1627229116
...,...,...,...,...,...,...,...,...
576,45.7625,106.2708,61.39,77,100,9.73,MN,1627229692
577,24.3667,92.1667,77.72,98,100,2.44,IN,1627229989
578,42.6177,-6.4155,82.71,29,33,5.01,ES,1627229990
579,-0.1934,-74.7819,81.79,64,94,1.66,CO,1627229992


In [None]:
# Save Dataframe as CSV file for future reference
cities_weather_df.to_csv(output_data_file)

In [None]:
# Get stats of all the numeric values for the dataframe
cities_weather_stats = cities_weather_df.describe()
cities_weather_stats

In [None]:
# Calculate indices of cities where Humidity is above 100
humidity_outliers = cities_weather_df[cities_weather_df["Humidity"] > 100].index
humidity_outliers

In [None]:
# Create new dataframe by dropping all humidity outliers by index
clean_city_data = cities_weather_df.drop(humidity_outliers, inplace=False)
clean_city_data

In [None]:
# Finding Linear Regression for Effective Field Goal and Winning Percentage
x_axis = cities_weather_df["Lat"]
y_axis = cities_weather_df["Max Temp"]

(slope, intercept, r_value, p_value, std_err) = linregress(x_axis,y_axis)

# Line equation to find Predcited winning percentages
regress_value = slope*x_axis + intercept

# Line equation to print on the scatter plot
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Scatter plot of Effective Field Goal against winning%
plt.scatter(x_axis, y_axis)

# Line plot for regerssion line
plt.plot(x_axis,regress_value, "r-")

# Display regression line
plt.annotate(line_eq,(20,70), fontsize=14, color="red")

# Labels and Titles for the plot
plt.xlabel("Latittude", fontsize=12)
plt.ylabel("Max Temperature (F)", fontsize=12)
plt.title("City Latittude vs. Max Temperature", fontsize=14)

# Saving scatter plot graph into visualization folder
#plt.savefig("Visualizations/EffectiveFieldGoal.jpg")

# Display plot
plt.show()

In [3]:
cities

['tsihombe',
 'sora',
 'castro',
 'busselton',
 'amderma',
 'santa maria',
 'cape town',
 'volovo',
 'new norfolk',
 'marcona',
 'rikitea',
 'hobart',
 'yellowknife',
 'henties bay',
 'ardakan',
 'nanortalik',
 'mingshui',
 'varaklani',
 'east london',
 'kamenskoye',
 'lavrentiya',
 'namwala',
 'sao miguel do araguaia',
 'kapaa',
 'ushuaia',
 'mataura',
 'port hawkesbury',
 'leningradskiy',
 'moron',
 'cayenne',
 'san cristobal',
 'hermanus',
 'carnarvon',
 'saskylakh',
 'jamestown',
 'kabare',
 'najran',
 'vao',
 'provideniya',
 'upernavik',
 'barentsburg',
 'ribeira grande',
 'samusu',
 'arraial do cabo',
 'quatre cocos',
 'bom jesus',
 'katsuura',
 'waipawa',
 'longyearbyen',
 'flinders',
 'bluff',
 'manjacaze',
 'port macquarie',
 'vilyuysk',
 'havoysund',
 'makakilo city',
 'illoqqortoormiut',
 'victoria',
 'hithadhoo',
 'port alfred',
 'lagoa',
 'burnie',
 'high rock',
 'raga',
 'dillon',
 'atuona',
 'yulara',
 'fort-shevchenko',
 'faanui',
 'fortuna',
 'qaanaaq',
 'souillac',
 '

In [None]:
except:
        print("City not found. Skipping...")
        pass