## Weather data

In [1]:
import http.client
import ast
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime

Get an API key here: https://opendata.aemet.es/centrodedescargas/altaUsuario?

In [2]:
api_key = ""

## Define main frunctions

In [3]:
# Get the weather dataframe for the specified station and date range.
def get_meteo_df(station,date_init,date_final,api_key):

    # Send the initial request.
    conn = http.client.HTTPSConnection("opendata.aemet.es")
    request_str = "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/{}/fechafin/{}/estacion/{}/?api_key={}".format(date_init,date_final,station,api_key)
    headers = {'cache-control': "no-cache"}
    conn.request("GET", request_str, headers=headers)

    # Interpret the response.
    res_init = conn.getresponse()
    data_init = res_init.read()
    dict_init = ast.literal_eval(data_init.decode("utf-8"))
    url_init = dict_init['datos']
    url_meta = dict_init['metadatos']

    # Send the request for the metadata.
    #print("Requesting metadata from:",url_meta)
    conn.request("GET", url_meta, headers=headers)

    res_meta = conn.getresponse()
    data_meta = res_meta.read()
    dict_meta = data_meta.decode("ISO-8859-1")
    #print(dict_meta)

    # Send the request for the data.
    #print("Requesting data from:",url_init)
    conn.request("GET", url_init, headers=headers)

    # Interpret the response.
    res_final = conn.getresponse()
    data_final = res_final.read()
    dict_data = ast.literal_eval(data_final.decode("ISO-8859-1"))
    
    return pd.DataFrame(dict_data)

def prepare_df(df):
    
    # Check that all required keys exist in the dataframe.
    required_keys = ['fecha', 'prec', 'sol', 'tmax', 'tmed', 'tmin']
    for rk in required_keys:
        if(not (rk in df)): 
            print("Warning: dataframe missing",rk)
            return None
        
    # Extract required elements.
    meteo = df[required_keys].copy()
    
    # Replace comma with dot.
    meteo[['prec', 'sol', 'tmax', 'tmed', 'tmin']] = meteo[['prec', 'sol', 'tmax', 'tmed', 'tmin']].apply(lambda x: x.str.replace(',','.'))
    
    # Replace Ip with 0.0.
    meteo[['prec']] = meteo[['prec']].apply(lambda x: x.str.replace('Ip','0.0'))
    
    # Convert to numerical values.
    meteo[['prec','sol','tmax','tmed','tmin']] = meteo[['prec','sol','tmax','tmed','tmin']].astype('float')

    # Convert dates to datetime objects.
    meteo['fecha'] = pd.to_datetime(meteo['fecha'], format="%Y-%m-%d")
    
    return meteo

In [4]:
# Define the dictionary associating a weather sensor to each region.
sensor_dict = {
    "Andalucia"         : "5402" , # CORDOBA/AEROPUERTO
    "Aragon"            : "9434" , # ZARAGOZA/AEROPUERTO
    "Asturias"          : "1208H", # GIJON, MUSEL
    "Baleares"          : "B278" , # PALMA DE MALLORCA/SON SAN JUAN
    "Canarias"          : "C029O", # LANZAROTE/AEROPUERTO
    "Cantabria"         : "1111" , # SANTANDER I,CMT
    "Castilla-La Mancha": "4121" , # CIUDAD REAL
    "Castilla y Leon"   : "2422" , # VALLADOLID
    "Cataluna"          : "0016A", # REUS/AEROPUERTO
    "Ceuta"             : "5000C", # CEUTA
    "C. Valenciana"     : "8414A", # VALENCIA/AEROPUERTO
    "Extremadura"       : "3469A", # CACERES
    "Galicia"           : "1428" , # SANTIAGO DE COMPOSTELA/LABACOLLA
    "Madrid"            : "3200" , # MADRID/GETAFE
    "Melilla"           : "6000A", # MELILLA
    "Murcia"            : "7178I", # MURCIA
    "Navarra"           : "9263D", # PAMPLONA/NOAIN
    "Pais Vasco"        : "1024E", # SAN SEBASTIAN,IGUELDO
    "La Rioja"          : "9170"   # LOGRONO/AGONCILLO
}
meteo_regions = {}

## Fetch a dataframe for each region over the selected date range

In [5]:
date_init = "2020-02-27T00:00:00UTC"
date_final = "2020-03-24T23:59:59UTC"
for region,station in sensor_dict.items():
    print(region,station)
    df = get_meteo_df(station,date_init,date_final,api_key)
    meteo = prepare_df(df)
    meteo_regions[region] = meteo

Andalucia 5402
Aragon 9434
Asturias 1208H
Baleares B278
Canarias C029O
Cantabria 1111
Castilla-La Mancha 4121
Castilla y Leon 2422
Cataluna 0016A
Ceuta 5000C
C. Valenciana 8414A
Extremadura 3469A
Galicia 1428
Madrid 3200
Melilla 6000A
Murcia 7178I
Navarra 9263D
Pais Vasco 1024E
La Rioja 9170


## Add the COVID data

In [6]:
# Read in the data.
cases = pd.read_csv("ccaa_covid19_casos.csv")

# Remove all accents from the region names.
cases['CCAA'] = cases['CCAA'].str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8')

# Set the region name as index.
cases = cases.set_index('CCAA')

Merge the COVID data into the dataframe for each region

In [19]:
df_regions = {}
for region,df in meteo_regions.items():
    
    print(region)
    
    # Get a new dataframe of cases with the dates and # of cases as columns.
    cframe = pd.DataFrame({'ncases': cases.loc[region][1:].values, 'fecha': cases.loc[region].keys()[1:].values})
    
    # Change the dates to datetime objects.
    cframe['fecha'] = pd.to_datetime(cframe['fecha'], format="%Y-%m-%d")
    
    # Merge the dataframes.
    mdf = pd.merge(df, cframe, on = 'fecha', how='left')
    df_regions[region] = mdf
print("DONE")

Andalucia
Aragon
Asturias
Baleares
Canarias
Cantabria
Castilla-La Mancha
Castilla y Leon
Cataluna
Ceuta
C. Valenciana
Extremadura
Galicia
Madrid
Melilla
Murcia
Navarra
Pais Vasco
La Rioja
DONE


In [55]:
df_regions

{'Andalucia':         fecha  prec   sol  tmax  tmed  tmin  ncases
 0  2020-02-27   0.0  10.8  22.2  12.2   2.3       1
 1  2020-02-28   0.0   9.5  23.9  14.8   5.7       6
 2  2020-02-29   0.6   7.8  16.9  13.6  10.3       8
 3  2020-03-01   1.3   7.4  18.1  14.0   9.9      12
 4  2020-03-02   0.1   9.3  20.7  15.2   9.8      12
 5  2020-03-03   0.0   6.8  17.6  11.8   6.1      13
 6  2020-03-04   0.0  10.0  22.4  15.6   8.9      13
 7  2020-03-05   0.0   7.1  19.0  12.3   5.6      12
 8  2020-03-06   0.0  10.8  19.7  12.4   5.2      21
 9  2020-03-07   0.0  10.1  20.4  11.2   2.0      27
 10 2020-03-08   0.0  10.8  23.5  12.8   2.2      35
 11 2020-03-09   0.0  11.2  23.8  13.8   3.9      54
 12 2020-03-10   0.0  10.9  29.0  17.7   6.4      71
 13 2020-03-11   0.0   8.6  28.7  18.0   7.3      90
 14 2020-03-12   0.0  10.6  26.9  17.8   8.8     115
 15 2020-03-13   0.0  11.0  22.9  14.9   6.9     219
 16 2020-03-14   0.0   9.1  22.3  16.8  11.3     269
 17 2020-03-15   0.4   5.8  23.6 

### Write all the dataframes to file

In [53]:
for key, val in df_regions.items():
    val.to_csv("data/data_{}.csv".format(str(key)))

### Code for running quick tests of individual stations

In [50]:
estacion = "1109"
df = get_meteo_df(estacion,date_init,date_final,api_key)

In [51]:
meteo = prepare_df(df)

In [52]:
meteo

Unnamed: 0,fecha,prec,sol,tmax,tmed,tmin
0,2020-02-27,1.2,0.0,17.3,14.4,11.4
1,2020-02-28,0.0,5.0,19.8,14.0,8.3
2,2020-02-29,1.3,0.0,19.8,14.9,10.0
3,2020-03-01,15.0,5.4,17.0,12.4,7.8
4,2020-03-02,13.6,0.5,11.7,9.4,7.2
5,2020-03-03,4.2,4.6,18.0,12.8,7.6
6,2020-03-04,3.8,1.2,19.6,15.3,11.0
7,2020-03-05,23.0,3.1,19.8,14.5,9.2
8,2020-03-06,16.5,1.8,11.8,8.2,4.6
9,2020-03-07,0.8,4.2,15.9,11.4,6.8


---

## OLD CODE: Information for data request
Get an API key here: https://opendata.aemet.es/centrodedescargas/altaUsuario?

**Available stations (Valencia):**
- 8058X: Oliva
- 8325X: Polinyà de Xúquer
- 8309X: Utiel (has full set of values)
- 8416Y: Valencia
- 8416: Valencia
- 8414A: Valencia Aeropuerto (has full set of values)
- 8293X: Xàtiva (has full set of values) 

In [None]:
estacion = "8414A"
date_init = "2020-01-01T00:00:00UTC"
date_final = "2020-03-22T23:59:59UTC"

## Send the request

In [None]:
# Send the initial request.
conn = http.client.HTTPSConnection("opendata.aemet.es")
request_str = "https://opendata.aemet.es/opendata/api/valores/climatologicos/diarios/datos/fechaini/{}/fechafin/{}/estacion/{}/?api_key={}".format(date_init,date_final,estacion,api_key)
headers = {'cache-control': "no-cache"}
conn.request("GET", request_str, headers=headers)

# Interpret the response.
res_init = conn.getresponse()
data_init = res_init.read()
dict_init = ast.literal_eval(data_init.decode("utf-8"))
url_init = dict_init['datos']
url_meta = dict_init['metadatos']

# Send the request for the metadata.
print("Requesting metadata from:",url_meta)
conn.request("GET", url_meta, headers=headers)

res_meta = conn.getresponse()
data_meta = res_meta.read()
dict_meta = data_meta.decode("ISO-8859-1")
print(dict_meta)

# Send the request for the data.
print("Requesting data from:",url_init)
conn.request("GET", url_init, headers=headers)

# Interpret the response.
res_final = conn.getresponse()
data_final = res_final.read()
dict_data = ast.literal_eval(data_final.decode("ISO-8859-1"))

## Examine the dataset

In [None]:
meteo = pd.DataFrame(dict_data)
meteo.head()

Use '.' as decimal separator (replace ',')

In [None]:
meteo[['prec', 'presMax', 'presMin', 'racha', 'sol', 'tmax', 'tmed', 'tmin', 'velmedia']] = meteo[['prec', 'presMax', 'presMin', 'racha', 'sol', 'tmax', 'tmed', 'tmin', 'velmedia']].apply(lambda x: x.str.replace(',','.'))

Replace 'Ip' precipitation values with '0.0'

In [None]:
meteo[['prec']] = meteo[['prec']].apply(lambda x: x.str.replace('Ip','0.0'))

Drop unwanted entries

In [None]:
meteo.drop(['altitud','dir','horaPresMax','horaPresMin','horaracha','horatmax','horatmin','indicativo','nombre','provincia'], axis=1, inplace=True)

Convert to numerical values

In [None]:
meteo[['prec','presMax','presMin','racha','sol','tmax','tmed','tmin','velmedia']] = meteo[['prec','presMax','presMin','racha','sol','tmax','tmed','tmin','velmedia']].astype('float')

Convert dates to datetime objects

In [None]:
meteo['fecha'] = pd.to_datetime(meteo['fecha'], format="%Y-%m-%d")

## Plots

In [None]:
cols_to_plot = meteo.columns.drop('fecha')
fig,axs =  plt.subplots(len(cols_to_plot), 1, figsize=(20,20), sharex=True)
fig.tight_layout()
for i, column in enumerate(cols_to_plot):
    axs[i].plot(meteo.fecha, meteo[column])
    axs[i].set_ylabel(column)
axs[i].set_xlabel('date');

In [None]:
plt.plot(meteo.fecha,meteo['tmax'])
plt.xticks(rotation='vertical')

months = mdates.MonthLocator()  # every month
plt.gca().xaxis.set_major_locator(months)
plt.ylabel('Max Temperature (C)')