In [1]:
import sys, os
sys.path.append(os.path.join(os.path.dirname(''), '../..'))
from proj3_gans_scooters.src.utils import PrivateKeysHandler

relative_path_to_file = '..\.env'
keys = PrivateKeysHandler(relative_path_to_file)
api_key_dict = keys.load_keys('APIs')

weather_arguments = {
    'q' : 'Berlin', #city
    'appid' : api_key_dict['openweather_key'], #api key
    'cnt' : '1', # number of results
    'units' : 'metric'
}

In [22]:
def get_weather(weather_arguments : dict) -> dict:
    import requests
    import json
    #check that only string arguments are present
    if not all(isinstance(val, str) for val in weather_arguments.values()):
        raise Exception('all arguments must be string')
        
    #preparing the request url
    weather_api = "http://api.openweathermap.org/data/2.5/forecast?"
    api_arguments = repr(weather_arguments).replace("': '", '=').replace("', '", '&')[2:-2]
    weather_request = weather_api + api_arguments
    
    response = requests.get(weather_request)
    if response.status_code != 200:
        raise Exception(f'openweather returned code {response.status_code} for url = {weather_request}')
    return response.json()

# weather_arguments defined at the top of the page
weather_json = get_weather(weather_arguments)

In [24]:
# convert json to pandas DataFrame
from pandas import DataFrame
def weather_json_to_df(weather_json : dict, city : str, keep_cols : list=None) -> DataFrame:
    # install flatdict; needed for weather_json_to_df()
    import sys, os
    sys.path.append(os.path.join(os.path.dirname(''), '../..'))
    from proj3_gans_scooters.src.utils import install_pip_pkg

    #!pip3 install flatdict
    install_pip_pkg({'flatdict'})
    
    from flatdict import FlatterDict as flatten
    import pandas as pd
    
    #take weather data and city name
    weather_df = pd.json_normalize([dict(flatten(i)) for i in weather_json['list']])
    weather_df = weather_df.assign(city = [city]*weather_df.shape[0])
    
    # return only selection of columns
    if not keep_cols:
        return weather_df
    keep_cols = [c for c in keep_cols if c in weather_df.columns]
    return weather_df[keep_cols]
    
keep_cols = ['city', 'dt_txt', 'main:temp', 'main:feels_like', 'main:humidity', 
             'weather:0:description', 'clouds:all', 'wind:speed', 'wind:deg', 
             'wind:gust', 'pop', 'rain:3h', 'snow:3h', 'sys:pod']
new_cols = ['city', 'date', 'temp_celcius', 'temp_feels_like_celcius', 'humidity_percent', 
             'weather_description', 'clouds_percent', 'wind_speed_meter_sec', 'wind_direction_degree', 
             'wind_gust_meter_sec', 'pop_percent', 'rain_3h_mm', 'snow_3h_mm', 'pod']
weather_df = weather_json_to_df(weather_json, 'Berlin', keep_cols)
weather_df = weather_df.rename(columns=dict(zip(keep_cols, new_cols)))

In [25]:
# handle nan values
from pandas import DataFrame
def cleanup_weather(df : DataFrame):
    import pandas as pd
    if 'rain_3h_mm' in df:
        df.loc[:, 'rain_3h_mm'] = df['rain_3h_mm'].fillna(0)
    if 'snow_3h_mm' in df:
        df.loc[:, 'snow_3h_mm'] = df['snow_3h_mm'].fillna(0)
    df.loc[:, 'date'] = pd.to_datetime(df['date'])

cleanup_weather(weather_df)
weather_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 12 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   city                     1 non-null      object        
 1   date                     1 non-null      datetime64[ns]
 2   temp_celcius             1 non-null      float64       
 3   temp_feels_like_celcius  1 non-null      float64       
 4   humidity_percent         1 non-null      int64         
 5   weather_description      1 non-null      object        
 6   clouds_percent           1 non-null      int64         
 7   wind_speed_meter_sec     1 non-null      float64       
 8   wind_direction_degree    1 non-null      int64         
 9   wind_gust_meter_sec      1 non-null      float64       
 10  pop_percent              1 non-null      float64       
 11  pod                      1 non-null      object        
dtypes: datetime64[ns](1), float64(5), int64(

In [29]:
from pandas import DataFrame
def scrape_weather(city_lst : list, openweather_key : str) -> DataFrame:
    weather_arguments = {
        'q' : '', #city
        'appid' : openweather_key, #api key
       # 'cnt' : '3', # number of results
        'units' : 'metric'
    }
    
    weather_lst = []
    for c in city_lst: 
        weather_arguments['q'] = c
        weather_json = get_weather(weather_arguments)

        keep_cols = ['city', 'dt_txt', 'main:temp', 'main:feels_like', 'main:humidity', 
                     'weather:0:description', 'clouds:all', 'wind:speed', 'wind:deg', 
                     'wind:gust', 'pop', 'rain:3h', 'snow:3h', 'sys:pod']
        weather_df = weather_json_to_df(weather_json, c, keep_cols)


        new_cols = ['city', 'date', 'temp_celcius', 'temp_feels_like_celcius', 'humidity_percent', 
                     'weather_description', 'clouds_percent', 'wind_speed_meter_sec', 'wind_direction_degree', 
                     'wind_gust_meter_sec', 'pop_percent', 'rain_3h_mm', 'snow_3h_mm', 'pod']
        weather_df = weather_df.rename(columns=dict(zip(keep_cols, new_cols)))
        cleanup_weather(weather_df)
        weather_lst.append(weather_df)
    df = None
    if weather_lst:
        import pandas as pd
        df = pd.concat(weather_lst, ignore_index=True)
    return df

In [30]:
weather_df.describe()

Unnamed: 0,temp_celcius,temp_feels_like_celcius,humidity_percent,clouds_percent,wind_speed_meter_sec,wind_direction_degree,wind_gust_meter_sec,pop_percent
count,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
mean,25.34,25.08,44.0,0.0,2.12,242.0,4.83,0.02
std,,,,,,,,
min,25.34,25.08,44.0,0.0,2.12,242.0,4.83,0.02
25%,25.34,25.08,44.0,0.0,2.12,242.0,4.83,0.02
50%,25.34,25.08,44.0,0.0,2.12,242.0,4.83,0.02
75%,25.34,25.08,44.0,0.0,2.12,242.0,4.83,0.02
max,25.34,25.08,44.0,0.0,2.12,242.0,4.83,0.02


In [31]:
#umlaute, spaces and other test:
cities = ['Kraków','Frankfurt am Main','Łódź','Wrocław',
          'Düsseldorf','Málaga','The Hague','Poznań','Gdańsk',
          'Palma de Mallorca','Iași','Las Palmas','Malmö','Varna',
          'Cluj-Napoca','Córdoba','Münster','Vila Nova de Gaia']



#runs without problems but I do not grep the cities
test_cities = scrape_weather(cities, api_key_dict['openweather_key'])

In [32]:
test_cities

Unnamed: 0,city,date,temp_celcius,temp_feels_like_celcius,humidity_percent,weather_description,clouds_percent,wind_speed_meter_sec,wind_direction_degree,wind_gust_meter_sec,pop_percent,rain_3h_mm,pod
0,Kraków,2022-06-08 15:00:00,20.74,20.83,75,light rain,75,1.51,67,2.07,0.61,0.47,d
1,Kraków,2022-06-08 18:00:00,19.74,19.88,81,light rain,82,2.25,64,4.19,0.91,2.17,d
2,Kraków,2022-06-08 21:00:00,17.41,17.48,87,light rain,81,0.89,84,1.09,0.72,0.58,n
3,Kraków,2022-06-09 00:00:00,14.90,14.90,94,light rain,89,0.89,99,1.00,0.59,0.13,n
4,Kraków,2022-06-09 03:00:00,14.31,14.25,94,light rain,82,0.73,96,0.88,0.36,0.19,d
...,...,...,...,...,...,...,...,...,...,...,...,...,...
715,Vila Nova de Gaia,2022-06-13 00:00:00,20.18,20.32,79,clear sky,4,1.61,147,1.97,0.00,0.00,n
716,Vila Nova de Gaia,2022-06-13 03:00:00,19.80,19.85,77,clear sky,0,1.23,121,1.48,0.00,0.00,n
717,Vila Nova de Gaia,2022-06-13 06:00:00,19.99,20.06,77,clear sky,0,0.89,153,1.35,0.00,0.00,d
718,Vila Nova de Gaia,2022-06-13 09:00:00,24.61,24.75,62,clear sky,0,0.90,339,1.85,0.00,0.00,d


In [33]:
#compare city names
import requests
import json
import pandas as pd

#preparing the request url
weather_api = "http://api.openweathermap.org/data/2.5/forecast?"


# install flatdict; needed for weather_json_to_df()
#!pip3 install flatdict
weather_lst = []
for c in cities: 
    weather_arguments['q'] = c
    api_arguments = repr(weather_arguments).replace("': '", '=').replace("', '", '&')[2:-2]
    weather_request = weather_api + api_arguments

    response = requests.get(weather_request)
    assert(response.status_code == 200)
    print(c, "=", response.json()['city']['name'])

Kraków = Krakow
Frankfurt am Main = Frankfurt am Main
Łódź = Łódź Voivodeship
Wrocław = Wrocław
Düsseldorf = Düsseldorf
Málaga = Málaga
The Hague = The Hague
Poznań = Poznań
Gdańsk = Gdańsk
Palma de Mallorca = Palma de Mallorca
Iași = Iasi
Las Palmas = Las Palmas
Malmö = Malmo
Varna = Varna
Cluj-Napoca = Cluj-Napoca
Córdoba = Córdoba
Münster = Münster
Vila Nova de Gaia = Vila Nova de Gaia
