# Solar Wind Hydro
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Installation to get nearest cities sending coordinates
# pip install citipy

In [2]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import scipy.stats as st
from scipy.stats import linregress
import gmaps
import os
import json



# Import API key
from config import weather_api_key
from config import g_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "resources/cities.csv"

# Range of latitudes and longitudes for the whole earth. 
# Lat +90 -90 degrees from Equator to North and South Pole
# longitude East West from Greenwich meridian + - 180 degrees
lat_range = (-90, 90)
lng_range = (-180, 180)

# NOTE. When I commit and push to send my work to GitHub, both Google and WeatherMap 
# sent me mails telling that my API Keys are blocked due to the fact that sending them in 
# config.py or any other folder make them public and can be compromised

print(weather_api_key)
print(g_key)

e1e691256bbc468e55847c6de677aaef
AIzaSyCOMOayURILAeMZ1nLDdSAiZ6h2vX0j2h8


## Generate Cities List

In [3]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations.
# put size of 1300 to get over 500 cities but do less accesses than default 1500
lats = np.random.uniform(lat_range[0], lat_range[1], size=4)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=4)
# create a tuple with both latitudes and longitudes
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination, use citipy, had to install first
# pip install citipy
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is not already in our list, then add it to cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)
# print(cities)

4

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [4]:
# OpenWeatherMap API Key
api_key = weather_api_key
# Starting URL for Weather Map API Call adding api_key.
# I have decided to extract data in METRIC units instead of IMPERIAL because I understand them better 
# and I am more used to using them
url = "http://api.openweathermap.org/data/2.5/weather?units=Metric&APPID=" + api_key

# Create empty lists to append later API data into lists 
city_list = []
country_list = []
lat_list = []
lng_list = []
temperature_list = []
cloud_list = []
humidity_list = []
wind_list = []

#counter for records
record_counter = 0

print('------------------------')
print('Beginning Weather Check')
print('------------------------')

# begin loop from cities list
for city in cities:
    # Try statement to append calls where value is found 
    # Not all calls return data as OpenWeatherMap will
    # not have have records in all the cities generated by CityPy module
    try: 
        # previous url already with API_key + city to enter in Weather web site 
        query_url = url + "&q=" + city
        # Get weather data
        response = requests.get(query_url).json()

    #   Filling list with data required in next exercises apart from city, lat, long,
    #   humidity, clouds, wind, etc
        city_list.append(response["name"])
        country_list.append(response["sys"]["country"])
        lat_list.append(response["coord"]["lat"])
        lng_list.append(response["coord"]["lon"])
        temperature_list.append(response['main']['temp_max'])    
        cloud_list.append(response["clouds"]["all"])
        humidity_list.append(response["main"]["humidity"])
        wind_list.append(response["wind"]["speed"])
        
        # Add one to record counter to print it
        record_counter += 1
        print('Retrieving data from city of {} ==> Record {}'.format(city, record_counter))
    
    # If no record found "skip" to next city
    except:
        print('-------------------------')
        print("There are not enough weather information{}. Skip record.")
        print('-------------------------')
    # jump to next record in city list
        pass
record_counter
# ending process after finishing loop
print('-------------------------')
print('Finished weather check')
print('-------------------------')

------------------------
Beginning Weather Check
------------------------
Retrieving data from city of mataura ==> Record 1
-------------------------
There are not enough weather information{}. Skip record.
-------------------------
Retrieving data from city of hermanus ==> Record 2
Retrieving data from city of georgetown ==> Record 3
-------------------------
Finished weather check
-------------------------


### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [5]:
# move data to dictionary and the DF
weather_data_dict = {
    "City": city_list,
    "Lat":lat_list, 
    "Lng":lng_list, 
    "Country":country_list,
    "Temp": temperature_list,    
    "Humidity": humidity_list,   
    "Cloudiness":cloud_list, 
    "Wind":wind_list}

columns = ["City",
           "Lat", 
           "Lng", 
           "Country",
           "Temp",    
           "Humidity",   
           "Cloudiness", 
           "Wind"]

# Create a data frame from dictionary
weather_data_df = pd.DataFrame(weather_data_dict)
#  only getting cities from USA
weather_data_US_df = weather_data_df.loc[weather_data_df["Country"] == "US", columns]
weather_data_US_df = weather_data_US_df.reset_index()
weather_data_US_df = weather_data_US_df.drop(weather_data_US_df.columns[0], axis=1)
# df_after_dropping = df.drop(df.columns[cols],axis=1)
weather_data_US_df



Unnamed: 0,City,Lat,Lng,Country,Max.Temp,Humidity,Cloudiness,Wind Speed


In [6]:
# Display count of weather data values 
# weather_data_df.count()

In [21]:
# Save data frame to CSV
# appending till get 500 US cities
# weather_data_US_df.to_csv('weather_data_US.csv', mode='a', header=True)
# without passing header 
weather_data_US_df.to_csv('weather_data_US.csv', mode='a', header=False)



In [42]:
# Create Dataframe from csv

working_weather_data_df = pd.read_csv("weather_data_US.csv")

working_weather_data_df


Unnamed: 0.1,Unnamed: 0,City,Lat,Lng,Country,Temp,Humidity,Cloudiness,Wind
0,0,Barrow,71.2906,-156.7887,US,-22.0,71,1,7.20
1,1,Palmer,42.1584,-72.3287,US,10.0,32,20,2.57
2,2,Laramie,41.3114,-105.5911,US,-1.0,80,90,2.57
3,3,Jamestown,42.0970,-79.2353,US,12.0,16,1,3.04
4,4,Saint Paul Harbor,57.7900,-152.4072,US,6.0,56,1,5.66
...,...,...,...,...,...,...,...,...,...
526,94,Warwick,41.7001,-71.4162,US,5.0,93,1,1.57
527,95,Bloomingdale,27.8936,-82.2404,US,15.0,88,90,2.57
528,96,Estelle,29.8458,-90.1067,US,16.0,100,1,2.72
529,97,Virginia Beach,36.8529,-75.9780,US,10.0,76,40,6.69


In [44]:
# Drop 1st column not needed
working_weather_data_df = working_weather_data_df.drop(working_weather_data_df.columns[0], axis=1)
working_weather_data_df

Unnamed: 0,City,Lat,Lng,Country,Temp,Humidity,Cloudiness,Wind
0,Barrow,71.2906,-156.7887,US,-22.0,71,1,7.20
1,Palmer,42.1584,-72.3287,US,10.0,32,20,2.57
2,Laramie,41.3114,-105.5911,US,-1.0,80,90,2.57
3,Jamestown,42.0970,-79.2353,US,12.0,16,1,3.04
4,Saint Paul Harbor,57.7900,-152.4072,US,6.0,56,1,5.66
...,...,...,...,...,...,...,...,...
526,Warwick,41.7001,-71.4162,US,5.0,93,1,1.57
527,Bloomingdale,27.8936,-82.2404,US,15.0,88,90,2.57
528,Estelle,29.8458,-90.1067,US,16.0,100,1,2.72
529,Virginia Beach,36.8529,-75.9780,US,10.0,76,40,6.69


In [45]:
# Drop rows with NaN values, in case they exist
# working_weather_data_df = working_weather_data_df.dropna()
working_weather_data_df

Unnamed: 0,City,Lat,Lng,Country,Temp,Humidity,Cloudiness,Wind
0,Barrow,71.2906,-156.7887,US,-22.0,71,1,7.20
1,Palmer,42.1584,-72.3287,US,10.0,32,20,2.57
2,Laramie,41.3114,-105.5911,US,-1.0,80,90,2.57
3,Jamestown,42.0970,-79.2353,US,12.0,16,1,3.04
4,Saint Paul Harbor,57.7900,-152.4072,US,6.0,56,1,5.66
...,...,...,...,...,...,...,...,...
526,Warwick,41.7001,-71.4162,US,5.0,93,1,1.57
527,Bloomingdale,27.8936,-82.2404,US,15.0,88,90,2.57
528,Estelle,29.8458,-90.1067,US,16.0,100,1,2.72
529,Virginia Beach,36.8529,-75.9780,US,10.0,76,40,6.69


In [47]:
# Drop rows with repeated cities
working_weather_data_df = working_weather_data_df.drop_duplicates(subset='City', keep="first")
working_weather_data_df

Unnamed: 0,City,Lat,Lng,Country,Temp,Humidity,Cloudiness,Wind
0,Barrow,71.2906,-156.7887,US,-22.00,71,1,7.20
1,Palmer,42.1584,-72.3287,US,10.00,32,20,2.57
2,Laramie,41.3114,-105.5911,US,-1.00,80,90,2.57
3,Jamestown,42.0970,-79.2353,US,12.00,16,1,3.04
4,Saint Paul Harbor,57.7900,-152.4072,US,6.00,56,1,5.66
...,...,...,...,...,...,...,...,...
521,Morris,40.8334,-74.5329,US,5.00,86,1,0.44
523,South Lake Tahoe,38.9332,-119.9844,US,3.89,55,1,1.56
526,Warwick,41.7001,-71.4162,US,5.00,93,1,1.57
527,Bloomingdale,27.8936,-82.2404,US,15.00,88,90,2.57


In [48]:
# Reset index
working_weather_data_df = working_weather_data_df.reset_index()
working_weather_data_df



Unnamed: 0,index,City,Lat,Lng,Country,Temp,Humidity,Cloudiness,Wind
0,0,Barrow,71.2906,-156.7887,US,-22.00,71,1,7.20
1,1,Palmer,42.1584,-72.3287,US,10.00,32,20,2.57
2,2,Laramie,41.3114,-105.5911,US,-1.00,80,90,2.57
3,3,Jamestown,42.0970,-79.2353,US,12.00,16,1,3.04
4,4,Saint Paul Harbor,57.7900,-152.4072,US,6.00,56,1,5.66
...,...,...,...,...,...,...,...,...,...
278,521,Morris,40.8334,-74.5329,US,5.00,86,1,0.44
279,523,South Lake Tahoe,38.9332,-119.9844,US,3.89,55,1,1.56
280,526,Warwick,41.7001,-71.4162,US,5.00,93,1,1.57
281,527,Bloomingdale,27.8936,-82.2404,US,15.00,88,90,2.57


In [49]:
# Drop 1st column not needed
working_weather_data_df = working_weather_data_df.drop(working_weather_data_df.columns[0], axis=1)
working_weather_data_df

Unnamed: 0,City,Lat,Lng,Country,Temp,Humidity,Cloudiness,Wind
0,Barrow,71.2906,-156.7887,US,-22.00,71,1,7.20
1,Palmer,42.1584,-72.3287,US,10.00,32,20,2.57
2,Laramie,41.3114,-105.5911,US,-1.00,80,90,2.57
3,Jamestown,42.0970,-79.2353,US,12.00,16,1,3.04
4,Saint Paul Harbor,57.7900,-152.4072,US,6.00,56,1,5.66
...,...,...,...,...,...,...,...,...
278,Morris,40.8334,-74.5329,US,5.00,86,1,0.44
279,South Lake Tahoe,38.9332,-119.9844,US,3.89,55,1,1.56
280,Warwick,41.7001,-71.4162,US,5.00,93,1,1.57
281,Bloomingdale,27.8936,-82.2404,US,15.00,88,90,2.57


In [50]:
# Create csv from clean data to store
# it contains 283 cities which accounts for a big sample of almost 10% of every city in USA > 500 unhabitants
working_weather_data_df.to_csv('working_weather_data_US.csv')

In [51]:
working_weather_data_df

Unnamed: 0,City,Lat,Lng,Country,Temp,Humidity,Cloudiness,Wind
0,Barrow,71.2906,-156.7887,US,-22.00,71,1,7.20
1,Palmer,42.1584,-72.3287,US,10.00,32,20,2.57
2,Laramie,41.3114,-105.5911,US,-1.00,80,90,2.57
3,Jamestown,42.0970,-79.2353,US,12.00,16,1,3.04
4,Saint Paul Harbor,57.7900,-152.4072,US,6.00,56,1,5.66
...,...,...,...,...,...,...,...,...
278,Morris,40.8334,-74.5329,US,5.00,86,1,0.44
279,South Lake Tahoe,38.9332,-119.9844,US,3.89,55,1,1.56
280,Warwick,41.7001,-71.4162,US,5.00,93,1,1.57
281,Bloomingdale,27.8936,-82.2404,US,15.00,88,90,2.57


In [52]:
# fill locations with latitude and longitude to feed markers for hotels
locations = working_weather_data_df[["Lat", "Lng"]]
# locations = working_weather_data_df[["Lat", "Lng"]].astype(float)

#---------------------------------------------------------------------------------------------------------------------
# NOTE: Do not change any of the code in this cell
# Using the template add the city marks to the map
info_box_template = """
<dl>
<dt>City</dt><dd>{City}</dd>
<dt>Country</dt><dd>{Country}</dd>
</dl>
"""
# Store the DataFrame Row
# NOTE: be sure to update with your DataFrame name
city_info = [info_box_template.format(**row) for index, row in working_weather_data_df.iterrows()]


#----------------------------------------------------------------------------------------------------------
# Uncomment following 9 lines if you want to plot a new map without heat layer
# Customize the size of the figure
figure_layout = {
    'width': '1000px',
    'height': '800px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'}
# plot map with parameters above
fig = gmaps.figure(layout=figure_layout)

# Add marker layer and info box content on top of map
markers = gmaps.marker_layer(locations, info_box_content = city_info)

# Add the layer to the map
fig.add_layer(markers)

# Display Map
fig

Figure(layout=FigureLayout(border='1px solid black', height='800px', margin='0 auto 0 auto', padding='1px', wi…

In [54]:
wind = working_weather_data_df["Wind"].astype(float)

# Plot map with center in specific parameters and zoom
# fig = gmaps.figure(center=(40.0, -20.0), zoom_level=2)

# Customize the size of the figure
figure_layout = {
    'width': '1000px',
    'height': '800px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'}
# plot map with parameters above
fig = gmaps.figure(layout=figure_layout)

# humidity.dtypes
# parameter for additional layer with heat plots, located in previously stored lat and long
# to locations and intensity depending humidity
heat_layer = gmaps.heatmap_layer(locations, 
                                 weights=wind, 
                                 dissipating=False, 
                                 max_intensity=15,
                                 point_radius = 2)

# plot heat layer over map
fig.add_layer(heat_layer)

fig

Figure(layout=FigureLayout(border='1px solid black', height='800px', margin='0 auto 0 auto', padding='1px', wi…

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
# Get previous data frame and create a new one comparing humidity values in column
# I guess there is an error, because I have not found evidence 
# that humidity can be bigger than 100%, ever.
# that's why i have compared values to = 100% which is max possible humidity and got values
# of such a big humidity which is the maximum. I have considered them outliers 
# and later eliminated them form list

humid_100_df = weather_data_df.loc[weather_data_df["Humidity"] == 100,:]

humid_100_df

In [None]:
#  Get the indices of cities that have humidity over 100%.
humid_100_df.index

In [None]:
# # Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# As I explain, in my case i have eliminated places with humidity == 100%
# creating a dataframe with humidity < 100%

clean_city_data_df = weather_data_df.loc[weather_data_df["Humidity"] < 100]
clean_city_data_df

In [None]:
# Save data frame to CSV

clean_city_data_df.to_csv('clean_city_data_df.csv')

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
# clean_city_data_df.columns

In [None]:
                                    
plt.figure(figsize=(12, 5))    # Set up total size of graph
    
    
# Plot scatter 
plt.scatter(clean_city_data_df["Lat"], 
            clean_city_data_df["Max.Temp"], 
            marker="o", 
            s=40,                            # bigger size of markers
            color="yellow",
            edgecolors='black')              # to get better contrast, I outline dots in black


# Titles of whole plot, x and y axis labels
plt.title("City Latitude vs. Max.Temperature (Celsius)")
plt.ylabel("Max. Temperature (C)")
plt.xlabel("Latitude")
plt.grid(True)                 # Adding grid for reference

# Save the figure
plt.savefig("figures/Latitude_vs_Temp.png")

# Show plot
plt.show()

## Latitude vs. Humidity Plot

In [None]:

plt.figure(figsize=(12, 5))    # Set up total size of graph

# Scatter plot
plt.scatter(weather_data_df["Lat"], 
            weather_data_df["Humidity"], 
            marker="o", 
            s=40,                                # bigger size of markers
            color="yellow",
            edgecolors='black')                  # to get better contrast, I outline dots in black

# Rest of graph details
plt.title("City Latitude vs. Humidity")
plt.ylabel("Humidity %")
plt.xlabel("Latitude")
plt.grid(True)

# Save the figure
plt.savefig("figures/Latitude_vs_Humidity.png")

# Show plot
plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
# plot
plt.figure(figsize=(12, 5))    # Set up total size of graph

# Scatter plot
plt.scatter(weather_data_df["Lat"], 
            weather_data_df["Cloudiness"], 
            marker="o", 
            s=40, 
            color="yellow",
            edgecolors='black')


# Rest of graph details
plt.title("City Latitude vs. Cloudiness")
plt.ylabel("Cloudiness")
plt.xlabel("Latitude")
plt.grid(True)

# Save the figure
plt.savefig("figures/Latitude_vs_Cloudiness.png")

# Show plot
plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
plt.figure(figsize=(12, 5))    # Set up total size of graph

# Scatter plot
plt.scatter(weather_data_df["Lat"], 
            weather_data_df["Wind Speed"], 
            marker="o", 
            s=40, 
            color="yellow",
            edgecolors='black')



# Rest of graph details
plt.title("City Latitude vs. Wind Speed(kpm)")
plt.ylabel("Wind Speed (kph)")
plt.xlabel("Latitude")
plt.grid(True)

# Save the figure
plt.savefig("figures/Latitude_vs_Wind.png")

# Show plot
plt.show()

## Linear Regression

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Create Northern and Southern Hemisphere DataFrames

# North latitude is always in positive degrees from equator (0 degrees latitude) to North Pole (90 degrees)
# so we create a dataframe with every data but including what is in north latitude
north_lat = weather_data_df.loc[weather_data_df["Lat"] > 0]
# South latitude is always in negative degrees from equator (0 degrees latitude) to South Pole (-90 degrees)
south_lat = weather_data_df.loc[weather_data_df["Lat"] < 0]
# if there is any city with exact 0 latitude degrees will not be included being over equator lat = 0

In [None]:
# Max temp extracted in Celsius instead of Farenheit (decided to extract Metrical instead of Imperial values)
# in previous dataframe

# Calculate linear regression for values of north latitute max temp and north lat position
(slope, intercept, rvalue, pvalue, stderr) = linregress(north_lat["Max.Temp"], north_lat["Lat"])
regress_values = north_lat["Max.Temp"] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2)) # equation of a line y = mx + b 

# Calculate the correlation coefficient between north latitute max temp and north lat position
correlation = st.pearsonr(north_lat["Max.Temp"],north_lat["Lat"])                 # pearson method
correlation = round(correlation[0],2)                                             # round value with 2 decimals
# Print answer of above calculation
print(f"In northern hemisphere he correlation coeficient between Max.Temperature and Latitude is {correlation}")

# Plot linear regression on top of scatter plot ----------------------------------------------------
plt.figure(figsize=(14, 8))                                        # Set up total size of graph

# Plot scatter
plt.scatter(north_lat["Max.Temp"], north_lat["Lat"],
            s=40, 
            color="yellow",
            edgecolors='black')

# plot linear regression with variables
plt.plot(north_lat["Max.Temp"],regress_values,"r-")

# Show formula for linear regression
plt.annotate(line_eq,(-25,79),              # position of line formula within x, y values in graph
             fontsize=14.5,
             color="red")  

# Add labels and title to plot
plt.title("Northern Hemisphere - Linear regression of Max Temp. vs Latitude",fontsize =18)
plt.xlabel("Max. Temperature (C)",fontsize =16)
plt.ylabel("Latitude",fontsize =16)
plt.xticks(fontsize=13)
plt.yticks(np.arange(0, 90, 10.0))  
plt.yticks(fontsize=13)
plt.grid()

# Save the figure
plt.savefig("figures/Northern_Regression_Max_temp_vs_Latitude.png")

# Show plot
plt.show()



####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Variables taking x = south latitude Max Temp and y = Latitude

x = south_lat["Max.Temp"]
y = south_lat["Lat"]


# Calculate linear regression for above values Max Temp and latitude
(slope, intercept, rvalue, pvalue, stderr) = linregress(x, y)
regress_values = x * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2)) # equation of a line y = mx + b 

# Calculate the correlation coefficient between Max Temp and latitude for the sample
correlation = st.pearsonr(x,y)                 # pearson method
correlation = round(correlation[0],2)          # round value with 2 decimals
# Print answer of above calculation
print(f"In southern hemisphere he correlation coeficient between Max.Temperature and Latitude is {correlation}")

# Plot linear regression on top of scatter plot ----------------------------------------------------

plt.figure(figsize=(14, 8))     # Set up total size of graph
# Plot scatter
plt.scatter(x, y,
            s=40, 
            color="yellow",
            edgecolors='black')

# plot linear regression with variables
plt.plot(x,regress_values,"r-")

# Show formula for linear regression
plt.annotate(line_eq,(10,-30),              # position of line formula within x, y values in graph
             fontsize=14.5,
             color="red")  

# Add labels and title to plot
 
plt.title("Southern Hemisphere - Linear regression of Max Temp. vs Latitude",fontsize =18)
plt.xlabel("Max. Temperature (C)",fontsize =16)
plt.ylabel("Latitude",fontsize =16)
plt.xticks(fontsize=13)
plt.yticks(np.arange(-90, 0, 10.0))  
plt.yticks(fontsize=13)
plt.grid()

# Save the figure
plt.savefig("figures/Southern_Regression_Max_Temp_vs_Latitude.png")

# Show plot
plt.show()

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Variables taking x = north latitude humidity and y = Latitude

x = north_lat["Humidity"]
y = north_lat["Lat"]
text_x = "Humidity %"
where = "Northern Hemisphere"


# Calculate linear regression for humidity and Latitude of northern hemisphere
(slope, intercept, rvalue, pvalue, stderr) = linregress(x, y)
regress_values = x * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2)) # equation of a line y = mx + b 

# Calculate the correlation coefficient between  humidity and Latitude of northern hemisphere
correlation = st.pearsonr(x,y)                 # pearson method
correlation = round(correlation[0],2)          # round value with 2 decimals
# Print answer of above calculation
print(f"In {where} he correlation coeficient between {text_x} and Latitude is {correlation}")

# Plot linear regression on top of scatter plot ----------------------------------------------------
plt.figure(figsize=(14, 8))     # Set up total size of graph
# Plot scatter
plt.scatter(x, y,
            s=40, 
            color="yellow",
            edgecolors='black')

# plot linear regression with variables
plt.plot(x,regress_values,"r-")

# Show formula for linear regression
plt.annotate(line_eq,(43,40),              # position of line formula within x, y values in graph
             fontsize=14.5,
             color="red")  

# Add labels and title to plot
 
plt.title(f"{where} - Linear regression of {text_x} vs Latitude",fontsize =18)
plt.xlabel(text_x,fontsize =16)
plt.ylabel("Latitude",fontsize =16)
plt.xticks(fontsize=13)
plt.yticks(np.arange(0, 90, 10.0))  
plt.yticks(fontsize=13)
plt.grid()

# Save the figure
plt.savefig("figures/Northern_Regression_Humidity_vs_Latitude.png")

# Show plot
plt.show()

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Variables taking x = south latitude humidity and y = Latitude

x = south_lat["Humidity"]
y = south_lat["Lat"]
text_x = "Humidity %"
where = "Southern Hemisphere"


# Calculate linear regression for humidity and Latitude of southern hemisphere
(slope, intercept, rvalue, pvalue, stderr) = linregress(x, y)
regress_values = x * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2)) # equation of a line y = mx + b 

# Calculate the correlation coefficient between humidity and Latitude of southern hemisphere
correlation = st.pearsonr(x,y)                 # pearson method
correlation = round(correlation[0],2)          # round value with 2 decimals
# Print answer of above calculation
print(f"In {where} he correlation coeficient between {text_x} and Latitude is {correlation}")

plt.figure(figsize=(14, 8))     # Set up total size of graph

# Plot linear regression on top of scatter plot ----------------------------------------------------
# Plot scatter
plt.scatter(x, y,
            s=40, 
            color="yellow",
            edgecolors='black')

# plot linear regression with variables
plt.plot(x,regress_values,"r-")

# Show formula for linear regression
plt.annotate(line_eq,(22,-25),              # position of line formula within x, y values in graph
             fontsize=14.5,
             color="red")  

# Add labels and title to plot
 
plt.title(f"{where} - Linear regression of {text_x} vs Latitude",fontsize =18)
plt.xlabel(text_x,fontsize =16)
plt.ylabel("Latitude",fontsize =16)
plt.xticks(fontsize=13)
plt.yticks(np.arange(-90, 0, 10.0))  
plt.yticks(fontsize=13)
plt.grid()

# Save the figure
plt.savefig("figures/Southern_Regression_Humidity_vs_Latitude.png")

# Show plot
plt.show()

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# variables: Cloudiness and Latitude

x = north_lat["Cloudiness"]
y = north_lat["Lat"]
text_x = "Cloudiness %"
where = "Northern Hemisphere"


# Calculate linear regression for above values cloudiness and latitude for northern hemisphere
(slope, intercept, rvalue, pvalue, stderr) = linregress(x, y)
regress_values = x * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2)) # equation of a line y = mx + b 

# Calculate the correlation coefficient between  cloudiness and latitude for northern hemisphere
correlation = st.pearsonr(x,y)                 # pearson method
correlation = round(correlation[0],2)          # round value with 2 decimals
# Print answer of above calculation
print(f"In {where} he correlation coeficient between {text_x} and Latitude is {correlation}")

# Plot linear regression on top of scatter plot ----------------------------------------------------
plt.figure(figsize=(14, 8))     # Set up total size of graph

# Plot scatter
plt.scatter(x, y,
            s=40, 
            color="yellow",
            edgecolors='black')

# plot linear regression with variables
plt.plot(x,regress_values,"r-")

# Show formula for linear regression
plt.annotate(line_eq,(23,44),              # position of line formula within x, y values in graph
             fontsize=14.5,
             color="red")  

# Add labels and title to plot
 
plt.title(f"{where} - Linear regression of {text_x} vs Latitude",fontsize =18)
plt.xlabel(text_x,fontsize =16)
plt.ylabel("Latitude",fontsize =16)
plt.xticks(fontsize=13)
plt.yticks(np.arange(0, 90, 10.0))  # y ticks range form 0 degrees to 90 with intervals of 10 degrees
plt.yticks(fontsize=13)
plt.grid()

# Save the figure
plt.savefig("figures/Northern_Regression_Cloudiness_vs_Latitude.png")

# Show plot
plt.show()

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# variables:

x = south_lat["Cloudiness"]
y = south_lat["Lat"]
text_x = "Cloudiness %"
where = "Southern Hemisphere"


# Calculate linear regression for Cloudiness (%) vs. Latitude for southern hemisphere
(slope, intercept, rvalue, pvalue, stderr) = linregress(x, y)
regress_values = x * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2)) # equation of a line y = mx + b 

# Calculate the correlation coefficient between Cloudiness (%) vs. Latitude for southern hemisphere
correlation = st.pearsonr(x,y)                 # pearson method
correlation = round(correlation[0],2)          # round value with 2 decimals
# Print answer of above calculation
print(f"In {where} he correlation coeficient between {text_x} and Latitude is {correlation}")

# Plot linear regression on top of scatter plot ----------------------------------------------------
plt.figure(figsize=(14, 8))     # Set up total size of graph

# Plot scatter
plt.scatter(x, y,
            s=40, 
            color="yellow",
            edgecolors='black')

# plot linear regression with variables
plt.plot(x,regress_values,"r-")

# Show formula for linear regression
plt.annotate(line_eq,(50,-16),              # position of line formula within x, y values in graph
             fontsize=14.5,
             color="red")  

# Add labels and title to plot
 
plt.title(f"{where} - Linear regression of {text_x} vs Latitude",fontsize =18)
plt.xlabel(text_x,fontsize =16)
plt.ylabel("Latitude",fontsize =16)
plt.xticks(fontsize=13)
plt.yticks(np.arange(-90, 0, 10.0))  
plt.yticks(fontsize=13)
plt.grid()

# Save the figure
plt.savefig("figures/Southern_Regression_Cloudiness_vs_Latitude.png")

# Show plot
plt.show()

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x = north_lat["Wind Speed"]
y = north_lat["Lat"]
# Extracted METRIC values instead of IMPERIAL for weatherAPI so speed in kilometers per hour instead of miles
text_x = "Wind Speed (kph)"
where = "Northern Hemisphere"


# Calculate linear regression for above values Wind Speed (kpm) vs. Latitude for northern hemisphere
(slope, intercept, rvalue, pvalue, stderr) = linregress(x, y)
regress_values = x * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2)) # equation of a line y = mx + b 

# Calculate the correlation coefficient between Wind Speed (kpm) vs. Latitude for northern hemisphere
correlation = st.pearsonr(x,y)                 # pearson method
correlation = round(correlation[0],2)          # round value with 2 decimals
# Print answer of above calculation
print(f"In {where} he correlation coeficient between {text_x} and Latitude is {correlation}")

# Plot linear regression on top of scatter plot ----------------------------------------------------
plt.figure(figsize=(14, 8))     # Set up total size of graph

# Plot scatter
plt.scatter(x, y,
            s=40, 
            color="yellow",
            edgecolors='black')

# plot linear regression with variables
plt.plot(x,regress_values,"r-")

# Show formula for linear regression
plt.annotate(line_eq,(11,55),              # position of line formula within x, y values in graph
             fontsize=14.5,
             color="red")  

# Add labels and title to plot
 
plt.title(f"{where} - Linear regression of {text_x}. vs Latitude",fontsize =18)
plt.xlabel(text_x,fontsize =16)
plt.ylabel("Latitude",fontsize =16)
plt.xticks(fontsize=13)
plt.yticks(np.arange(0, 90, 10.0))  
plt.yticks(fontsize=13)
plt.grid()

# Save the figure
plt.savefig("figures/Northern_Regression_Wind_Speed_vs_Latitude.png")

# Show plot
plt.show()

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [84]:
# variables:
x = south_lat["Wind Speed"]
y = south_lat["Lat"]
text_x = "Wind Speed (kph)"
where = "Southern Hemisphere"

# Calculate linear regression for  Wind Speed (kpm) vs. Latitude for southern hemisphere
(slope, intercept, rvalue, pvalue, stderr) = linregress(x, y)
regress_values = x * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2)) # equation of a line y = mx + b 

# Calculate the correlation coefficient between  Wind Speed (kpm) and Latitude for southern hemisphere
correlation = st.pearsonr(x,y)                 # pearson method
correlation = round(correlation[0],2)          # round value with 2 decimals
# Print answer of above calculation
print(f"In {where} he correlation coeficient between {text_x} and Latitude is {correlation}")

# Plot linear regression on top of scatter plot ----------------------------------------------------
plt.figure(figsize=(14, 8))     # Set up total size of graph

# Plot scatter
plt.scatter(x, y,
            s=40, 
            color="yellow",
            edgecolors='black')

# plot linear regression with variables
plt.plot(x,regress_values,"r-")

# Show formula for linear regression
plt.annotate(line_eq,(12,-29),              # position of line formula within x, y values in graph
             fontsize=14.5,
             color="red")  

# Add labels and title to plot
 
plt.title(f"{where} - Linear regression of {text_x}. vs Latitude",fontsize =18)
plt.xlabel(text_x,fontsize =16)
plt.ylabel("Latitude",fontsize =16)
plt.xticks(fontsize=13)
plt.yticks(np.arange(-90, 0, 10.0))  
plt.yticks(fontsize=13)
plt.grid()

# Save the figure
plt.savefig("figures/Southern_Regression_Wind_Speed_vs_Latitude.png")

# Show plot
plt.show()

NameError: name 'south_lat' is not defined