It doesn't say anywhere that we have to use matplotlib.

Since that has never happened before, I used a visualization library I prefer over plt.

Dependencies and Setup

In [30]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import requests
from scipy.stats import linregress
from citipy import citipy  # Incorporated citipy to determine city based on latitude and longitude
# Import API key
from config import weather_api_key as api_key
from iso3166 import countries_by_alpha2  # pip install is03166


sns.set(rc={'figure.dpi': 300, 'savefig.dpi': 300})
sns.set_context('notebook')


def k_to_f(temp):
    temp = round((temp - 273.15) * 9 / 5 + 32, 2)
    return temp


def kmh_to_mph(speed):
    return speed * 0.621371


def north_or_south(latitude):
    return latitude < 0


# output file
output_file = "cities.csv"

Range of latitudes and longitudes

In [31]:
lat_range = (-90, 90)
lng_range = (-180, 180)

lats = [np.random.uniform(lat_range[0], lat_range[1]) for i in range(3000)]
lngs = [np.random.uniform(lng_range[0], lng_range[1]) for i in range(3000)]

lat_lngs = list(zip(lats, lngs))

cities = []

for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    city = city.strip()
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

print(len(cities))

1024


In [32]:
temp_dict = {}
# {city:[lat, max_temp, humidity, windspeed, clouds]}
i, j = 0, 0

# I didnt store all of the data needed for part 2 because I didnt realize Id need it
# when doing part 1. So I had to remake the df as cities_2

for city in cities:
    if i < 500:
        search_str = f'http://api.openweathermap.org/data/2.5/weather?q={city}&appid={api_key}'
        resp = requests.get(search_str)
        if resp.status_code == 200:
            resp = resp.json()
            vs = [
                    resp['coord']['lat'],
                    resp['coord']['lon'],
                    k_to_f(resp['main']['temp_max']),
                    resp['main']['humidity'],
                    kmh_to_mph(resp['wind']['speed']),
                    resp['clouds']['all'],
                    countries_by_alpha2[resp['sys']['country']][0],
                    resp['dt']
            ]
            temp_dict[city] = vs
            i += 1
            print(f'Request {j}: {city} request successful.')
        else:
            print(f'Request {j}: {city} request failed.')
        j += 1
    else:
        print(
                f'{i} successful requests made.\n'
                f'{round(i / j * 100, 2)}% of requests succeeded.'
        )
        break

Request 0: gushikawa request successful.
Request 1: punta arenas request successful.
Request 2: barawe request failed.
Request 3: west wendover request successful.
Request 4: cape town request successful.
Request 5: busselton request successful.
Request 6: ngunguru request successful.
Request 7: umm kaddadah request successful.
Request 8: mataura request successful.
Request 9: severo-kurilsk request successful.
Request 10: buckeye request successful.
Request 11: kavieng request successful.
Request 12: anadyr request successful.
Request 13: hervey bay request successful.
Request 14: nicoya request successful.
Request 15: irituia request successful.
Request 16: becerril request successful.
Request 17: comodoro rivadavia request successful.
Request 18: puerto ayora request successful.
Request 19: illoqqortoormiut request failed.
Request 20: kutum request successful.
Request 21: balikpapan request successful.
Request 22: katsuura request successful.
Request 23: faanui request successful.
R

In [37]:
dft = pd.DataFrame(temp_dict).T

# dft.columns = [
#         'Latitude',
#         'Max Temperature (F)',
#         'Humidity (%)',
#         'Wind Speed (mph)',
#         'Clouds (%)'
# ]

dft.columns = [
        'lat',
        'long',
        'max_temp',
        'humidity',
        'wind_speed',
        'clouds',
        'country',
        'dt'
]

dft['is_south'] = dft['lat'].apply(lambda x: 1 if x < 0 else 0)
print(dft.head())

dft.to_csv('cities_2.csv')

                   lat      long max_temp humidity wind_speed clouds  \
gushikawa      26.3544  127.8686    70.83       72   4.629214     34   
punta arenas    -53.15  -70.9167    42.82       70   7.350819     40   
west wendover  40.7391 -114.0733    60.94       50   2.970153    100   
cape town     -33.9258   18.4232    68.34       63   0.832637     13   
busselton       -33.65  115.3333    41.04       71   1.217887     73   

                                country          dt  is_south  
gushikawa                         Japan  1635184942         0  
punta arenas                      Chile  1635184753         1  
west wendover  United States of America  1635184887         0  
cape town                  South Africa  1635184933         1  
busselton                     Australia  1635184942         1  


In [None]:
df_north = dft[dft['is_south'] == 0].copy()
df_south = dft[dft['is_south'] == 1].copy()


def make_figures(df, name=None, x='Latitude', y=None, hue_by=None):
    sns.scatterplot(
            x=x,
            y=y,
            data=df,
            hue=hue_by,
            palette='magma',
            alpha=0.8,
            edgecolor='b',
            legend=False
    )

    if name is not None:
        title = f'{name.title()} - {y} vs {x}'
    else:
        title = f'{y} vs. {x}'

    plt.title(title)
    plt.savefig(f'{title}.png')
    plt.close()
    return


ys = ['Max Temperature (F)', 'Humidity (%)', 'Wind Speed (mph)', 'Clouds (%)']

for y in ys:
    make_figures(dft, y=y, hue_by=dft['is_south'])

- Max temperature vs Latitude
    - This chart demonstrates global temperature trends. It succinctly illustrates that temperatures
  greatly decrease as you move closer to the poles.
- Humidity vs Latitude
    - This chart analyzes trends in humidity as you move closer to the poles. There is no trend.
- Wind speed vs Latitude
    - I find this graph the most interesting. It demonstrates the chaoticness of weather at the poles,
  which are well known to be volatile regions regarding wind flow due to their [unique properties](https://nsidc.org/cryosphere/arctic-meteorology/weather_climate_patterns.html).
- Clouds vs Latitude
    - Again, another uninteresting chart that is essentially random.

In [None]:
# if only :)
# import plotly.express as px
#
# fig = px.scatter(
#         df_north,
#         x='Latitude',
#         y=ys[0],
#         trendline='ols'
# )
# fig.show()
#
# reg = px.get_trendline_results(fig)
# reg.query().px_fit_results.iloc[0].summary()

# def scatter_trend(df, name=None, x='Latitude', y=None, hue_by=None):

#     fig = px.scatter(
#             df,
#             x=x,
#             y=y,
#             trendline='ols',
#             color=hue_by
#     )
#
#     plt.savefig(f'{name}.png')
#     plt.show()
#     plt.close()
#     return

# fig = scatter_trend(df_north, name='north', y='Max Temperature (F)')
# plt.show()


In [None]:
def make_figures_reg(df, name, x='Latitude', y=None):
    reg_vals = linregress(df[x], df[y])

    sns.regplot(
            x=x,
            y=y,
            data=df,
            scatter=True,
            fit_reg=True,
            ci=None,
            # label='Max Temperature (F) vs. Latitude',
            scatter_kws={
                    's': 10,
                    'alpha': 0.8
            },
            line_kws={
                    'color': 'r',
                    'lw': 1,
                    'label': f'Regression: {reg_vals[2]}'
            }
    )

    title = f'{name.title()} - {y} vs {x}'

    plt.xlabel(x, fontsize=9)
    plt.xticks(fontsize=8)
    plt.ylabel(y, fontsize=9)
    plt.yticks(fontsize=8)


    plt.title(title)
    plt.legend([f'Regression: {round(reg_vals[2],3)}', y], fontsize='8')
    plt.tight_layout()
    plt.savefig(f'{title}.png')
    plt.show()
    plt.close()
    return


dfs = [df_north, df_south]
names = ['North', 'South']
ys = ['Max Temperature (F)', 'Humidity (%)', 'Wind Speed (mph)', 'Clouds (%)']

for frame, name in zip(dfs, names):
    for y in ys:
        make_figures_reg(frame, name, y=y)

- Max Temp vs Latitude
    - North and South pole data is predictably consistent, and the same as above. As you move away from the equator, temperatures decrease.
- Clouds vs Latitude
    - In the north pole, cloud coverage appears to be densely concentrated around lat 40-60, but otherwise seems random.
    - It would be interesting to have more data for the south pole to see if a similar trend emerges; however, with this data, it is not possible to determine that.
- Wind vs Latitude
    - We again see the trend from the single chart above hold. The trend, when displayed like this, is much weaker than it initially appeared.
- Humidity vs Latitude
    - Not much to see here. The dearth of data in the south relative to the north again complicates any straightforward comparison.

In [26]:
cntry = countries_by_alpha2[resp['sys']['country']][0]

'Brazil'