In [7]:
import random
from citipy import citipy
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 

In [8]:
import requests
from config import weather_api_key
from datetime import datetime
import time
from scipy.stats import linregress

In [9]:
# generate 1500 random latitude and longitude data as a list of tuples
lats = np.random.uniform(-90.0,90.0,size=1500)
lngs = np.random.uniform(-180.0,180.0,size=1500)

In [10]:
lats_lngs = zip(lats, lngs)
coordinates = list(lats_lngs)
print(coordinates[:11])

[(-8.817231907038618, -50.7074924791703), (-78.86387052392087, 8.440568067936823), (3.493991777500341, 149.9639761780321), (25.177371107753785, 140.2411884230538), (23.623053469478407, 61.98957319161053), (75.03313527555224, -29.994906199081356), (46.371694529254086, -60.357796902833215), (35.833679311822934, 58.72192272645316), (14.202041534265675, -95.3990470414174), (-80.15048494549488, -24.110031622346696), (-44.16907601951362, 167.57845909901192)]


In [11]:
# use citipy module to get nearest city names
cities = list()
for coor in coordinates:
    cities_name = citipy.nearest_city(coor[0], coor[1]).city_name
    # ensure no any duplicate cities
    if cities_name not in cities:
        cities.append(cities_name)
print(cities[:10], 'Generate', len(cities))

['conceicao do araguaia', 'hermanus', 'kavieng', 'shingu', 'jiwani', 'illoqqortoormiut', 'sydney mines', 'neyshabur', 'xadani', 'ushuaia'] Generate 620


In [12]:
# use OpenWeather API to request, get, parse JSON to retrieve weather data for each city.

# initial counters for log and sets
record_count = 1 
set_count = 1

city_data = list()
basic_url = "http://api.openweathermap.org/data/2.5/weather?units=imperial&APPID=" + weather_api_key

print('Beginning Data Retrieval     ')
print("-----------------------------")

# use enumerate() method to loop index and item simutanously 
for i, item in enumerate(cities):
    if i % 50 == 0 and i != 0 :
        record_count = 1  # initialize at every beginning of set
        set_count +=1 # increment set count
    # build URL for API call
    url = basic_url + '&q=' + item
    # Log the URL, record, and set numbers and the city.
    print(f'Processing Record {record_count} of Set {set_count} | {item}')
    # increment record count
    record_count +=1

    try: 
        js = requests.get(url).json()
        city_name = item
        city_country = js['sys']['country']
        city_UTCdate = js['dt']
        city_date = datetime.utcfromtimestamp(city_UTCdate).strftime('%Y-%m-%d %H:%M:%S')
        city_lat = js['coord']['lat']
        city_lon = js['coord']['lon']
        city_max_temp = js['main']['temp_max']
        city_humidity = js['main']['humidity']
        city_cload = js['clouds']['all']
        city_wind_speed = js['wind']['speed']
        # append as a list of dictionaries
        city_data.append({'City':city_name, 'Coounty':city_country, 'Date':city_date, 
                        'Lat':city_lat, 'Lng': city_lon,'Max Temp':city_max_temp,
                        'Humidity':city_humidity,'Cloudiness': city_cload,'Wind Speed':city_wind_speed})
    except:
        print("City not found. Skipping...")
        pass

# Indicate that Data Loading is complete
print('-------------------------------')
print('Data Retrieval Complete        ')
print('-------------------------------')

print(len(city_data))

Beginning Data Retrieval     
-----------------------------
Processing Record 1 of Set 1 | conceicao do araguaia
Processing Record 2 of Set 1 | hermanus
Processing Record 3 of Set 1 | kavieng
Processing Record 4 of Set 1 | shingu
Processing Record 5 of Set 1 | jiwani
Processing Record 6 of Set 1 | illoqqortoormiut
City not found. Skipping...
Processing Record 7 of Set 1 | sydney mines
Processing Record 8 of Set 1 | neyshabur
Processing Record 9 of Set 1 | xadani
City not found. Skipping...
Processing Record 10 of Set 1 | ushuaia
Processing Record 11 of Set 1 | te anau
Processing Record 12 of Set 1 | tasiilaq
Processing Record 13 of Set 1 | moindou
Processing Record 14 of Set 1 | severo-kurilsk
Processing Record 15 of Set 1 | rikitea
Processing Record 16 of Set 1 | hay river
Processing Record 17 of Set 1 | praia da vitoria
Processing Record 18 of Set 1 | marsh harbour
Processing Record 19 of Set 1 | yumen
Processing Record 20 of Set 1 | east london
Processing Record 21 of Set 1 | jamest

Processing Record 33 of Set 4 | urdzhar
City not found. Skipping...
Processing Record 34 of Set 4 | sadao
Processing Record 35 of Set 4 | nuuk
Processing Record 36 of Set 4 | kungsbacka
Processing Record 37 of Set 4 | vao
Processing Record 38 of Set 4 | talakan
Processing Record 39 of Set 4 | kruisfontein
Processing Record 40 of Set 4 | pucallpa
Processing Record 41 of Set 4 | belushya guba
City not found. Skipping...
Processing Record 42 of Set 4 | nizhneyansk
City not found. Skipping...
Processing Record 43 of Set 4 | sechura
Processing Record 44 of Set 4 | auki
Processing Record 45 of Set 4 | belmonte
Processing Record 46 of Set 4 | komsomolskiy
Processing Record 47 of Set 4 | bam
Processing Record 48 of Set 4 | sitka
Processing Record 49 of Set 4 | pleasanton
Processing Record 50 of Set 4 | roma
Processing Record 1 of Set 5 | kayes
Processing Record 2 of Set 5 | guanare
Processing Record 3 of Set 5 | mar del plata
Processing Record 4 of Set 5 | saldanha
Processing Record 5 of Set 5

Processing Record 24 of Set 8 | sarankhola
Processing Record 25 of Set 8 | paradwip
City not found. Skipping...
Processing Record 26 of Set 8 | mys shmidta
City not found. Skipping...
Processing Record 27 of Set 8 | vardo
Processing Record 28 of Set 8 | esperance
Processing Record 29 of Set 8 | bakel
Processing Record 30 of Set 8 | lavrentiya
Processing Record 31 of Set 8 | vostok
Processing Record 32 of Set 8 | jiangyan
Processing Record 33 of Set 8 | dolbeau
City not found. Skipping...
Processing Record 34 of Set 8 | banamba
Processing Record 35 of Set 8 | longlac
City not found. Skipping...
Processing Record 36 of Set 8 | tarauaca
Processing Record 37 of Set 8 | ilulissat
Processing Record 38 of Set 8 | kijang
Processing Record 39 of Set 8 | nipawin
Processing Record 40 of Set 8 | tiznit
Processing Record 41 of Set 8 | bytow
Processing Record 42 of Set 8 | margate
Processing Record 43 of Set 8 | mumford
Processing Record 44 of Set 8 | shimoda
Processing Record 45 of Set 8 | castro
P

Processing Record 12 of Set 12 | marrakesh
Processing Record 13 of Set 12 | de-kastri
Processing Record 14 of Set 12 | tucurui
Processing Record 15 of Set 12 | la ligua
Processing Record 16 of Set 12 | bacungan
Processing Record 17 of Set 12 | mayo
Processing Record 18 of Set 12 | manggar
Processing Record 19 of Set 12 | tual
Processing Record 20 of Set 12 | primore
City not found. Skipping...
Processing Record 21 of Set 12 | odienne
Processing Record 22 of Set 12 | alice springs
Processing Record 23 of Set 12 | perth
Processing Record 24 of Set 12 | oistins
Processing Record 25 of Set 12 | makungu
Processing Record 26 of Set 12 | chulym
Processing Record 27 of Set 12 | san quintin
Processing Record 28 of Set 12 | lata
Processing Record 29 of Set 12 | babra
Processing Record 30 of Set 12 | sinnamary
Processing Record 31 of Set 12 | leek
Processing Record 32 of Set 12 | novyy urengoy
Processing Record 33 of Set 12 | port lincoln
Processing Record 34 of Set 12 | fort-shevchenko
Processin

In [16]:
# Convert the array of dictionaries to a Pandas DataFrame.
city_data_df = pd.DataFrame(city_data)
city_data_df.head(10)

Unnamed: 0,City,Coounty,Date,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed
0,conceicao do araguaia,BR,2022-07-04 03:31:53,-8.2578,-49.2647,77.54,40,59,3.98
1,hermanus,ZA,2022-07-04 03:26:46,-34.4187,19.2345,52.2,80,3,6.38
2,kavieng,PG,2022-07-04 03:25:29,-2.5744,150.7967,85.46,68,50,14.7
3,shingu,JP,2022-07-04 03:26:02,33.7333,135.9833,74.46,98,100,3.15
4,jiwani,PK,2022-07-04 03:31:54,25.05,61.7417,83.79,82,97,5.12
5,sydney mines,CA,2022-07-04 03:31:55,46.2367,-60.2177,63.81,87,75,7.0
6,neyshabur,IR,2022-07-04 03:31:55,36.2133,58.7958,74.75,36,0,1.99
7,ushuaia,AR,2022-07-04 03:31:37,-54.8,-68.3,33.46,80,75,3.44
8,te anau,NZ,2022-07-04 03:31:56,-45.4167,167.7167,48.2,79,64,7.4
9,tasiilaq,GL,2022-07-04 03:31:57,65.6145,-37.6368,35.58,84,45,3.44


In [17]:
# Create the output file (CSV).
output_data_file = "weather_data/cities.csv"
# Export the City_Data into a CSV.
city_data_df.to_csv(output_data_file, index_label="City_ID")

In [6]:


# %%
# 4 scatter plots, showcase weather parameter changing by latitude
# extract relevant fields(columns) as Series
lats_Series = city_data_df['Lat']
max_temp_Series = city_data_df['Max Temp']
humidity_Series = city_data_df['Humidity']
cloud_Series = city_data_df['Cloudiness']
wind_speed_Series = city_data_df['Wind Speed']

#show today's datetime in fig label
today = time.strftime('%x')
# %%
# build the 1st scatter plot for lat vs. Max Temp.
fig = plt.figure()
plt.scatter(lats_Series,max_temp_Series, 
            alpha=0.8, edgecolors='k', linewidths=1,marker='o', label='Cities' )
plt.title(f"City Latitude vs. Max Temperature " + today)
plt.xlabel("Latitude")
plt.ylabel('Max Temperature (F)')
plt.grid()
plt.savefig('weather_data/Fig1.png')
plt.show()

# %%
# build the 2nd scatter plot for lat vs. Humidity.
fig = plt.figure()
plt.scatter(lats_Series,humidity_Series, 
            alpha=0.8, edgecolors='k', linewidths=1,marker='o', label='Cities' )
plt.title(f"City Latitude vs. Humidity " + today)
plt.xlabel("Latitude")
plt.ylabel('Humidity (%)')
plt.grid()
plt.savefig('weather_data/Fig2.png')
#plt.legend()
plt.show()

# %%
# build the 3rd scatter plot for lat vs. Cloudiness.
fig = plt.figure()
plt.scatter(lats_Series,cloud_Series, 
            alpha=0.8, edgecolors='k', linewidths=1,marker='o', label='Cities' )
plt.title(f"City Latitude vs. Cloudiness (%) " + today)
plt.xlabel("Latitude")
plt.ylabel('Cloudiness (%)')
plt.grid()
plt.savefig('weather_data/Fig3.png')
plt.show()


# %%
# build the 4th scatter plot for lat vs. Wind Speed.
fig = plt.figure()
plt.scatter(lats_Series,wind_speed_Series, 
            alpha=0.8, edgecolors='k', linewidths=1,marker='o', label='Cities' )
plt.title(f"City Latitude vs. Wind Speed " + today)
plt.xlabel("Latitude")
plt.ylabel('Wind Speed (mph)')
plt.grid()
plt.savefig('weather_data/Fig4.png')
plt.show()

# %% [markdown]
# create a funtion with 5 variables, returns a combine plot of regree line and scatter
# Main process: 
# 1. retrieve 5 stats info by linregress() method 
# 2. use slope and alpha to get regression equation
# 3. list comprehension method to get each Y_expected value(in reg line) based by x_value
# 4. draw plot and scatter in same figure
# %%
def plot_linear_function(x_values, y_values,title, y_label, text_coordinates):
    (slope, intercept, r_value, p_value, std_err) = linregress(x_values,y_values)
    #step 2: Get the equation of the line. and R, P values
    line_eq_str = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    correl_str = str(round(r_value,2))
    pvalue_str = str(p_value)
    #step 3: Calculate the regression line "y values" from the slope and intercept.
    regress_Y_values =[(x * slope + intercept) for x in x_values]
    #step 4:Create a scatter plot and plot the regression line.
    plt.scatter(x_values,y_values)
    plt.plot(x_values,regress_Y_values, color = 'r')
    plt.annotate(line_eq_str,xy= text_coordinates, fontsize =15, color = 'red')
    plt.xlabel('Latitude')
    plt.ylabel(y_label)
    plt.title(title)

    #plt.show()
    print(f'R_value is {correl_str}, and P_value is {pvalue_str}')
# %%
# seperate northern and southern hemisphere latitude
northern_hemi_df = city_data_df.loc[(city_data_df['Lat'] >= 0),:]

southern_hemi_df = city_data_df.loc[(city_data_df['Lat'] < 0),:]

northern_hemi_df.count()
southern_hemi_df.count()
# %%
# build the Northern hemisphere regression line and scatter plot for lat vs. Max Temp.
N_max_temp_x_Series = northern_hemi_df['Lat']
N_max_temp_y_Series = northern_hemi_df['Max Temp']
# call function: plot_linear_function 
plot_linear_function(N_max_temp_x_Series, 
                N_max_temp_y_Series, 
                'Linear Regression on the Northern Humisphere \n for Maximun Temperature',
                'Max Temp', (10,-40))
plt.savefig('weather_data/Regress_fig1.png')
plt.show()
# %%
# build the Southern hemisphere regression line and scatter plot for lat vs. Max Temp.
S_max_temp_x_Series = southern_hemi_df['Lat']
S_max_temp_y_Series = southern_hemi_df['Max Temp']
# call function: plot_linear_function 
plot_linear_function(S_max_temp_x_Series, 
                S_max_temp_y_Series, 
                '''Linear Regression on the Southern Humisphere 
                \n for Maximun Temperature''',
                'Max Temp', (-50,90))
plt.savefig('weather_data/Regress_fig2.png')
plt.show()
# %%
# build the Northern hemisphere regression line and scatter plot for lat vs. Humidity.
N_humidity_x_Series = northern_hemi_df['Lat']
N_humidity_y_Series = northern_hemi_df['Humidity']
# call function: plot_linear_function 
plot_linear_function(N_humidity_x_Series, 
                N_humidity_y_Series, 
                'Linear Regression on the Northern Humisphere \n for % Humidity',
                '% Humidity', (50,15))
plt.savefig('weather_data/Regress_fig3.png')
plt.show()

# %%
# build the Southern hemisphere regression line and scatter plot for lat vs. Humidity.
S_humidity_x_Series = southern_hemi_df['Lat']
S_humidity_y_Series = southern_hemi_df['Humidity']
# call function: plot_linear_function 
plot_linear_function(S_humidity_x_Series, 
                S_humidity_y_Series, 
                'Linear Regression on the Southern Humisphere \n for % Humidity',
                '% Humidity', (-55,10))
plt.savefig('weather_data/Regress_fig4.png')
plt.show()

# %%
# build the Northern hemisphere regression line and scatter plot for lat vs. Cloudiness.
N_Cloudiness_x_Series = northern_hemi_df['Lat']
N_Cloudiness_y_Series = northern_hemi_df['Cloudiness']
# call function: plot_linear_function 
plot_linear_function(N_Cloudiness_x_Series, 
                N_Cloudiness_y_Series, 
                'Linear Regression on the Northern Humisphere \n for % Cloudiness',
                '% Cloudiness', (10,50))
plt.savefig('weather_data/Regress_fig5.png')
plt.show()

# %%
# build the Southern hemisphere regression line and scatter plot for lat vs. Cloudiness.
S_Cloudiness_x_Series = southern_hemi_df['Lat']
S_Cloudiness_y_Series = southern_hemi_df['Cloudiness']
# call function: plot_linear_function 
plot_linear_function(S_Cloudiness_x_Series, 
                S_Cloudiness_y_Series, 
                'Linear Regression on the Southern Humisphere \n for % Cloudiness',
                '% Cloudiness', (-55,50))
plt.savefig('weather_data/Regress_fig6.png')
plt.show()

# %%
# build the Northern hemisphere regression line and scatter plot for lat vs. Wind Speed.
N_WindSpeed_x_Series = northern_hemi_df['Lat']
N_WindSpeed_y_Series = northern_hemi_df['Wind Speed']
# call function: plot_linear_function 
plot_linear_function(N_WindSpeed_x_Series, 
                N_WindSpeed_y_Series, 
                'Linear Regression on the Northern Humisphere \n for Wind Speed',
                'Wind Speed', (40,35))
plt.savefig('weather_data/Regress_fig7.png')
plt.show()

# %%
# build the Southern hemisphere regression line and scatter plot for lat vs. Wind Speed.
S_WindSpeed_x_Series = southern_hemi_df['Lat']
S_WindSpeed_y_Series = southern_hemi_df['Wind Speed']
# call function: plot_linear_function 
plot_linear_function(S_WindSpeed_x_Series, 
                S_WindSpeed_y_Series, 
                'Linear Regression on the Southern Humisphere \n for Wind Speed',
                'Wind Speed', (-50,20))
plt.savefig('weather_data/Regress_fig8.png')
plt.show()

# %%

Beginning Data Retrieval     
-----------------------------
Processing Record 1 of Set 1 | cockburn town
Processing Record 2 of Set 1 | saint-leu
Processing Record 3 of Set 1 | hobart
Processing Record 4 of Set 1 | vaini
Processing Record 5 of Set 1 | marovoay
Processing Record 6 of Set 1 | zhigansk
Processing Record 7 of Set 1 | lensk
Processing Record 8 of Set 1 | kantang
Processing Record 9 of Set 1 | ushuaia
Processing Record 10 of Set 1 | avarua
Processing Record 11 of Set 1 | busselton
Processing Record 12 of Set 1 | punta arenas
Processing Record 13 of Set 1 | dolbeau
City not found. Skipping...
Processing Record 14 of Set 1 | mount gambier
Processing Record 15 of Set 1 | camargo
Processing Record 16 of Set 1 | upernavik
Processing Record 17 of Set 1 | touros
Processing Record 18 of Set 1 | gravdal
Processing Record 19 of Set 1 | portland
Processing Record 20 of Set 1 | tawnat
City not found. Skipping...
Processing Record 21 of Set 1 | new norfolk
Processing Record 22 of Set 1 |

Processing Record 38 of Set 4 | ahipara
Processing Record 39 of Set 4 | sur
Processing Record 40 of Set 4 | amderma
City not found. Skipping...
Processing Record 41 of Set 4 | piotrkow trybunalski
Processing Record 42 of Set 4 | adelaide
Processing Record 43 of Set 4 | bhadrachalam
Processing Record 44 of Set 4 | ponta delgada
Processing Record 45 of Set 4 | egvekinot
Processing Record 46 of Set 4 | thinadhoo
Processing Record 47 of Set 4 | kruisfontein
Processing Record 48 of Set 4 | uusikaupunki
Processing Record 49 of Set 4 | sept-iles
Processing Record 50 of Set 4 | kasongo-lunda
Processing Record 1 of Set 5 | hermanus
Processing Record 2 of Set 5 | brae
Processing Record 3 of Set 5 | asau
Processing Record 4 of Set 5 | ormara
Processing Record 5 of Set 5 | mount isa
Processing Record 6 of Set 5 | belushya guba
City not found. Skipping...
Processing Record 7 of Set 5 | synya
Processing Record 8 of Set 5 | suao
City not found. Skipping...
Processing Record 9 of Set 5 | grindavik
Pro

Processing Record 26 of Set 8 | new baltimore
Processing Record 27 of Set 8 | siavonga
Processing Record 28 of Set 8 | sombrio
Processing Record 29 of Set 8 | finnsnes
Processing Record 30 of Set 8 | do gonbadan
Processing Record 31 of Set 8 | owando
Processing Record 32 of Set 8 | south lake tahoe
Processing Record 33 of Set 8 | moerai
Processing Record 34 of Set 8 | koscierzyna
Processing Record 35 of Set 8 | hecun
Processing Record 36 of Set 8 | atasu
Processing Record 37 of Set 8 | chuy
Processing Record 38 of Set 8 | esperance
Processing Record 39 of Set 8 | guanica
Processing Record 40 of Set 8 | ulladulla
Processing Record 41 of Set 8 | kroya
Processing Record 42 of Set 8 | provideniya
Processing Record 43 of Set 8 | vanavara
Processing Record 44 of Set 8 | sechura
Processing Record 45 of Set 8 | kisangani
Processing Record 46 of Set 8 | sumbawa
City not found. Skipping...
Processing Record 47 of Set 8 | bolu
Processing Record 48 of Set 8 | vao
Processing Record 49 of Set 8 | or

Processing Record 16 of Set 12 | kuala terengganu
Processing Record 17 of Set 12 | camacha
Processing Record 18 of Set 12 | umm kaddadah
Processing Record 19 of Set 12 | kihei
Processing Record 20 of Set 12 | uvalde
Processing Record 21 of Set 12 | otradnoye
Processing Record 22 of Set 12 | merauke
Processing Record 23 of Set 12 | placido de castro
Processing Record 24 of Set 12 | vestmannaeyjar
Processing Record 25 of Set 12 | barra dos coqueiros
Processing Record 26 of Set 12 | athens
Processing Record 27 of Set 12 | manzhouli
Processing Record 28 of Set 12 | gizycko
Processing Record 29 of Set 12 | weligama
Processing Record 30 of Set 12 | fallon
Processing Record 31 of Set 12 | obo
Processing Record 32 of Set 12 | tura
Processing Record 33 of Set 12 | zhuhai
Processing Record 34 of Set 12 | ancud
Processing Record 35 of Set 12 | zyryanka
Processing Record 36 of Set 12 | calama
Processing Record 37 of Set 12 | havelock
Processing Record 38 of Set 12 | san miguel
Processing Record 39

FileNotFoundError: [Errno 2] No such file or directory: 'weather_data/cities.csv'