# Imports

In [16]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import seaborn as sns

#data science
import pandas as pd
import numpy as np

import requests
from pprint import pprint
import time


#regression
from scipy.stats import linregress
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "cities_new.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

# Generate City List

In [17]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

628

In [18]:
cities_sub = cities[0: ]
cities_sub

['punta arenas',
 'bengkulu',
 'severo-kurilsk',
 'lillooet',
 'tahoua',
 'new norfolk',
 'ushuaia',
 'barra patuca',
 'vardo',
 'illoqqortoormiut',
 'khatanga',
 'puerto ayora',
 'yellowknife',
 'barrow',
 'saleaula',
 'can tho',
 'kruisfontein',
 'grafton',
 'bolshaya sosnova',
 'san carlos de bariloche',
 'maumere',
 'butaritari',
 'avarua',
 'sibolga',
 'chokurdakh',
 'cherskiy',
 'hermanus',
 'idrinskoye',
 'kedgwick',
 'albany',
 'anadyr',
 'fujin',
 'cabinda',
 'lensk',
 'tonosho',
 'atuona',
 'campo largo',
 'mahebourg',
 'hamilton',
 'xining',
 'haines junction',
 'rikitea',
 'tomatlan',
 'akoupe',
 'port elizabeth',
 'jamestown',
 'karaul',
 'escanaba',
 'aklavik',
 'eibergen',
 'victoria',
 'taolanaro',
 'stokmarknes',
 'mataura',
 'havre de grace',
 'ancud',
 'bintulu',
 'cabedelo',
 'samarinda',
 'busselton',
 'quatre cocos',
 'mahina',
 'fevralsk',
 'ise',
 'mys shmidta',
 'mukhen',
 'wenling',
 'guerrero negro',
 'kurilsk',
 'wanning',
 'shahr-e kord',
 'longyearbyen',
 

# Make Requests

In [19]:
cities = []

#init lists to hold parsed data
lats =[]
longs =[]
temps = []
humids = []
clouds = []
speeds = []
city_names = []

for i in range(len(cities_sub)):
    city = cities_sub[i]
    
    #create the url
    units = "imperial"
    url = f"http://api.openweathermap.org/data/2.5/weather?q={city}&units={units}&appid={weather_api_key}"
    #print(url)
    
    #make the request
    response = requests.get(url)
    #print(response.status_code)
    
    #error check
    if response.status_code == 200:
        data = response.json()
        #pprint(data)
    
        try:
            #extract the data
            lat =data["coord"]["lat"]
            long = data["coord"]["lon"]
            temp = data["main"]["temp"]
            humidity = data["main"]["humidity"]
            cloudiness = data["clouds"]["all"]
            speed = data["wind"]["speed"]
            city_name = data["name"]

            #save the data
            city_names.append(city_name)
            lats.append(lat)
            longs.append(long)
            temps.append(temp)
            humids.append(humidity)
            clouds.append(cloudiness)
            speeds.append(speed)

        except Exception as e:
            print(f"Through exception for city {city}: {e}")
    
    elif response.status_code == 404:
        print (f"Missing data in OpenWeatherAPI for {city}")
        
    else:
        print(response.status_code)
        print("Oh gosh darnit. The API is broken. Sad Face.")
    
    #print every 5
    if (i % 5 ==0):
        print (f"Got data for city index: {i} of {len(cities_sub)}")
        
    time.sleep(1)
    
# make the dataframe
df = pd.DataFrame()
df["City Name"] = city_names
df["Latitude"] = lats
df["Longitude"] = longs
df["Temperature"] = temps
df["Humidity"] = humids
df["Cloudiness"] = clouds
df["Wind Speed"] = speeds
df.head(10)

Got data for city index: 0 of 628
Got data for city index: 5 of 628
Missing data in OpenWeatherAPI for illoqqortoormiut
Got data for city index: 10 of 628
Missing data in OpenWeatherAPI for saleaula
Got data for city index: 15 of 628
Got data for city index: 20 of 628
Got data for city index: 25 of 628
Got data for city index: 30 of 628
Got data for city index: 35 of 628
Got data for city index: 40 of 628
Got data for city index: 45 of 628
Missing data in OpenWeatherAPI for karaul
Got data for city index: 50 of 628
Missing data in OpenWeatherAPI for taolanaro
Got data for city index: 55 of 628
Got data for city index: 60 of 628
Missing data in OpenWeatherAPI for fevralsk
Missing data in OpenWeatherAPI for mys shmidta
Got data for city index: 65 of 628
Got data for city index: 70 of 628
Got data for city index: 75 of 628
Got data for city index: 80 of 628
Got data for city index: 85 of 628
Got data for city index: 90 of 628
Missing data in OpenWeatherAPI for tsihombe
Missing data in Ope

Unnamed: 0,City Name,Latitude,Longitude,Temperature,Humidity,Cloudiness,Wind Speed
0,Punta Arenas,-53.15,-70.92,46.4,81,90,8.05
1,Bengkulu,-3.8,102.27,75.29,86,100,4.03
2,Severo-Kuril'sk,50.68,156.12,38.79,73,98,31.65
3,Lillooet,50.69,-121.94,50.85,93,90,8.05
4,Tahoua,14.89,5.27,82.4,23,0,4.7
5,New Norfolk,-42.78,147.06,46.83,63,1,1.01
6,Ushuaia,-54.8,-68.3,66.2,37,0,3.36
7,Barra Patuca,15.8,-84.28,77.72,91,100,13.42
8,Vardø,70.37,31.11,39.2,80,75,28.86
9,Khatanga,71.97,102.5,-6.59,90,13,8.1


In [1]:
df.to_csv(output_data_file, index=False)

NameError: name 'df' is not defined