# **WEATHER DATA**

## 1) MANUAL WAY

In [None]:
# import libraries
import pandas as pd
import requests
from datetime import datetime
import pytz

In [None]:
city = 'Berlin'

In [None]:
# my API
API_key = "d33337dea54eb73898dd268ae77230f3"

url = (f"http://api.openweathermap.org/data/2.5/forecast?q={city}&appid={API_key}&units=metric")

# use url > make a request > download html  https://openweathermap.org/forecast5
response = requests.get(url)

# format scraped data into JSON format
response = response.json()

response

{'cod': '200',
 'message': 0,
 'cnt': 40,
 'list': [{'dt': 1676030400,
   'main': {'temp': 4.47,
    'feels_like': 0.83,
    'temp_min': 4.47,
    'temp_max': 4.7,
    'pressure': 1024,
    'sea_level': 1024,
    'grnd_level': 1028,
    'humidity': 81,
    'temp_kf': -0.23},
   'weather': [{'id': 803,
     'main': 'Clouds',
     'description': 'broken clouds',
     'icon': '04d'}],
   'clouds': {'all': 75},
   'wind': {'speed': 4.69, 'deg': 261, 'gust': 8.93},
   'visibility': 10000,
   'pop': 0,
   'sys': {'pod': 'd'},
   'dt_txt': '2023-02-10 12:00:00'},
  {'dt': 1676041200,
   'main': {'temp': 4.48,
    'feels_like': 1.18,
    'temp_min': 4.48,
    'temp_max': 4.5,
    'pressure': 1027,
    'sea_level': 1027,
    'grnd_level': 1027,
    'humidity': 79,
    'temp_kf': -0.02},
   'weather': [{'id': 803,
     'main': 'Clouds',
     'description': 'broken clouds',
     'icon': '04d'}],
   'clouds': {'all': 83},
   'wind': {'speed': 4.07, 'deg': 249, 'gust': 8.85},
   'visibility': 10000

Let's get a timestamp of when we get the data. Datetime uses the uses the current time of the system, which on local computers is normally correct. But as we're in the cloud, computers are not always in our country, and we therefore add on the timezone module to ensure that our timestamp is local to us and not the computer.

In [None]:
tz = pytz.timezone('Europe/Berlin')
now = datetime.now().astimezone(tz)

now

datetime.datetime(2023, 2, 10, 12, 5, 13, 456477, tzinfo=<DstTzInfo 'Europe/Berlin' CET+1:00:00 STD>)

In [None]:
# we'll store the information in this dicitonary:
weather_dict = {'city': [],
                'country': [],
                'forecast_time': [],
                'outlook': [],
                'detailed_outlook': [],
                'temperature': [],
                'temperature_feels_like': [],
                'clouds': [],
                'rain': [],
                'snow': [],
                'wind_speed': [],
                'wind_deg': [],
                'humidity': [],
                'pressure': []}

In [None]:
# let's begin the loop
for i in response['list']:
  weather_dict['city'].append(response['city']['name'])
  weather_dict['country'].append(response['city']['country'])
  weather_dict['forecast_time'].append(i['dt_txt'])
  weather_dict['outlook'].append(i['weather'][0]['main'])
  weather_dict['detailed_outlook'].append(i['weather'][0]['description'])
  weather_dict['temperature'].append(i['main']['temp'])
  weather_dict['temperature_feels_like'].append(i['main']['feels_like'])
  weather_dict['clouds'].append(i['clouds']['all'])
  # sometimes the data is missing for rain and snow. As it is not always raining or snowing
  # we cannot make a DataFrame unless the lists are all the same length, therefore missing values are bad
  # here we say try to append a value if there is one. If not, append a 0
  try:
      weather_dict['rain'].append(i['rain']['3h'])
  except:
      weather_dict['rain'].append('0')
  try:
      weather_dict['snow'].append(i['snow']['3h'])
  except:
      weather_dict['snow'].append('0')
  weather_dict['wind_speed'].append(i['wind']['speed'])
  weather_dict['wind_deg'].append(i['wind']['deg'])
  weather_dict['humidity'].append(i['main']['humidity'])
  weather_dict['pressure'].append(i['main']['pressure'])
  #weather_dict['information_retrieved_at'].append(now.strftime("%d/%m/%Y %H:%M:%S"))

**Now** we convert our dictionary to a DataFrame

In [None]:
weather_from_dict_df = pd.DataFrame(weather_dict)

weather_from_dict_df.head()

Unnamed: 0,city,country,forecast_time,outlook,detailed_outlook,temperature,temperature_feels_like,clouds,rain,snow,wind_speed,wind_deg,humidity,pressure
0,Berlin,DE,2023-02-10 12:00:00,Clouds,broken clouds,4.47,0.83,75,0,0,4.69,261,81,1024
1,Berlin,DE,2023-02-10 15:00:00,Clouds,broken clouds,4.48,1.18,83,0,0,4.07,249,79,1027
2,Berlin,DE,2023-02-10 18:00:00,Clouds,overcast clouds,2.91,-0.93,90,0,0,4.36,239,83,1030
3,Berlin,DE,2023-02-10 21:00:00,Clouds,broken clouds,1.6,-2.96,81,0,0,5.08,234,79,1032
4,Berlin,DE,2023-02-11 00:00:00,Clouds,overcast clouds,2.33,-2.28,90,0,0,5.54,243,81,1030


## 2) FUNCTIONAL WAY

**As a final step**, to keep everything tidy: let's bring everything we did together in a function. And allow the function to take a list of cities as an input

In [None]:
# import libraries
import pandas as pd
import requests
from datetime import datetime
import pytz

In [None]:
def function(cities):
  API_key = "d33337dea54eb73898dd268ae77230f3"

  tz = pytz.timezone('Europe/Berlin')
  now = datetime.now().astimezone(tz)

  weather_dict = {'city': [],
                'country': [],
                'forecast_time': [],
                'outlook': [],
                'detailed_outlook': [],
                'temperature': [],
                'temperature_feels_like': [],
                'clouds': [],
                'rain': [],
                'snow': [],
                'wind_speed': [],
                'wind_deg': [],
                'humidity': [],
                'pressure': [],
                'information_retrieved_at': []}

  for city in cities:
    url = (f"http://api.openweathermap.org/data/2.5/forecast?q={city}&appid={API_key}&units=metric")
    
    # use url > make a request > download html  https://openweathermap.org/forecast5
    response = requests.get(url)

    # format scraped data into JSON format
    response = response.json()

    for i in response['list']:
      weather_dict['city'].append(response['city']['name'])
      weather_dict['country'].append(response['city']['country'])
      weather_dict['forecast_time'].append(i['dt_txt'])
      weather_dict['outlook'].append(i['weather'][0]['main'])
      weather_dict['detailed_outlook'].append(i['weather'][0]['description'])
      weather_dict['temperature'].append(i['main']['temp'])
      weather_dict['temperature_feels_like'].append(i['main']['feels_like'])
      weather_dict['clouds'].append(i['clouds']['all'])
      try:
          weather_dict['rain'].append(i['rain']['3h'])
      except:
          weather_dict['rain'].append('0')
      try:
          weather_dict['snow'].append(i['snow']['3h'])
      except:
          weather_dict['snow'].append('0')
      weather_dict['wind_speed'].append(i['wind']['speed'])
      weather_dict['wind_deg'].append(i['wind']['deg'])
      weather_dict['humidity'].append(i['main']['humidity'])
      weather_dict['pressure'].append(i['main']['pressure'])
      weather_dict['information_retrieved_at'].append(now.strftime("%d/%m/%Y %H:%M:%S"))

  return pd.DataFrame(weather_dict)

### WEATHER DATAFRAME

In [None]:
list_of_cities = ['Berlin', 'Hamburg', 'London', 'Istanbul', 'Barcelona']

# use the function
weather_df = function(list_of_cities)
weather_df

Unnamed: 0,city,country,forecast_time,outlook,detailed_outlook,temperature,temperature_feels_like,clouds,rain,snow,wind_speed,wind_deg,humidity,pressure,information_retrieved_at
0,Berlin,DE,2023-02-10 12:00:00,Clouds,broken clouds,4.47,0.83,75,0,0,4.69,261,81,1024,10/02/2023 12:05:13
1,Berlin,DE,2023-02-10 15:00:00,Clouds,broken clouds,4.48,1.18,83,0,0,4.07,249,79,1027,10/02/2023 12:05:13
2,Berlin,DE,2023-02-10 18:00:00,Clouds,overcast clouds,2.91,-0.93,90,0,0,4.36,239,83,1030,10/02/2023 12:05:13
3,Berlin,DE,2023-02-10 21:00:00,Clouds,broken clouds,1.60,-2.96,81,0,0,5.08,234,79,1032,10/02/2023 12:05:13
4,Berlin,DE,2023-02-11 00:00:00,Clouds,overcast clouds,2.33,-2.28,90,0,0,5.54,243,81,1030,10/02/2023 12:05:13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,Barcelona,ES,2023-02-14 21:00:00,Clouds,few clouds,10.51,9.71,17,0,0,0.24,82,80,1028,10/02/2023 12:05:13
196,Barcelona,ES,2023-02-15 00:00:00,Clouds,scattered clouds,9.68,9.34,30,0,0,1.45,345,83,1026,10/02/2023 12:05:13
197,Barcelona,ES,2023-02-15 03:00:00,Clouds,few clouds,8.99,8.10,16,0,0,1.90,12,81,1025,10/02/2023 12:05:13
198,Barcelona,ES,2023-02-15 06:00:00,Clouds,few clouds,8.75,7.49,18,0,0,2.30,21,75,1024,10/02/2023 12:05:13


# 3) SAVE DATAFRAME

In [None]:
from google.colab import files

weather_df.to_csv("weather_df.csv", index=False)
files.download("weather_df.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>