# Weather Data Analysis with a public API

In [1]:
import grequests as asynhttp
import pandas as pd
import json

### Data Collection and Cleaning

In [2]:
capitales = {
    "Africa": ["El Cairo", "Abuya", "Nairobi", "Addis Abeba", "Pretoria"],
    "America": ["Washington D. C.", "Ottawa", "Ciudad de México", "Brasilia", "Buenos Aires",],
    "Asia": ["Pekín", "Tokio", "Nueva Delhi", "Seúl", "Riad"],
    "Europa": ["Madrid", "París", "Berlín", "Roma", "Londres"],
    "Oceania": ["Canberra", "Wellington", "Port Moresby", "Suva", "Apia"],
}
key="030c86ad50fe0b9ad9817deeb4fc2825"

In [3]:
localizaciones={
    "continente": [],
    "name": [],
    "lat": [],
    "lon": []
}

rs=[]
for continente in capitales:
    for ciudad in capitales[continente]:
        url=f"http://api.openweathermap.org/geo/1.0/direct?q={ciudad}&appid={key}"
        rs.append(asynhttp.get(url))
        localizaciones["continente"].append(continente)
        
futures = asynhttp.map(rs)
for futuro in futures:
    response=futuro.json()    
    localizaciones["name"].append(response[0]["name"])
    localizaciones["lat"].append(response[0]["lat"])
    localizaciones["lon"].append(response[0]["lon"])

In [4]:
df_localizaciones=pd.DataFrame(localizaciones).set_index("name")
df_localizaciones

Unnamed: 0_level_0,continente,lat,lon
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Cairo,Africa,30.044388,31.235726
Abuja,Africa,9.064331,7.489297
Nairobi,Africa,-1.303264,36.826384
Addis Ababa,Africa,9.010793,38.761252
Pretoria,Africa,-25.745928,28.18791
Washington,America,38.895037,-77.036543
Ottawa,America,45.420878,-75.690111
Mexico City,America,19.43263,-99.133178
Brasília,America,-15.793404,-47.882317
Buenos Aires,America,-34.607568,-58.437089


In [5]:
rs=[]
for entrada in df_localizaciones.values:
    url=f"https://api.openweathermap.org/data/2.5/weather?lat={entrada[1]}&lon={entrada[2]}&appid={key}&units=metric&lang=sp"
    rs.append(asynhttp.get(url))

tiempo=[]
futures = asynhttp.map(rs)
for futuro in futures:
    tiempo.append(json.loads(futuro.text))

In [6]:
for ciudad in tiempo:
    for i, e in enumerate(ciudad["weather"]):
        del ciudad["weather"][i]["id"]
        del ciudad["weather"][i]["icon"]

In [7]:
df_tiempo = pd.json_normalize(tiempo, "weather", [["main", "temp"], ["main", "feels_like"],
    ["main", "temp_min"],["main", "temp_max"],["main", "pressure"],["main", "humidity"],
    ["main", "sea_level"],["main", "grnd_level"], "visibility", ["wind", "speed"], ["wind", "deg"], 
    ["wind", "gust"], ["rain", "1h"], ["snow", "1h"], ["clouds", "all"], "dt"], sep="-", errors="ignore")

In [8]:
df_tiempo = df_localizaciones.reset_index().join(df_tiempo)

In [9]:
df_tiempo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 22 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   name             25 non-null     object 
 1   continente       25 non-null     object 
 2   lat              25 non-null     float64
 3   lon              25 non-null     float64
 4   main             25 non-null     object 
 5   description      25 non-null     object 
 6   main-temp        25 non-null     object 
 7   main-feels_like  25 non-null     object 
 8   main-temp_min    25 non-null     object 
 9   main-temp_max    25 non-null     object 
 10  main-pressure    25 non-null     object 
 11  main-humidity    25 non-null     object 
 12  main-sea_level   25 non-null     object 
 13  main-grnd_level  25 non-null     object 
 14  visibility       25 non-null     object 
 15  wind-speed       25 non-null     object 
 16  wind-deg         25 non-null     object 
 17  wind-gust        6

In [10]:
df_tiempo = df_tiempo.astype({"main-temp": "float", "main-feels_like": "float", "main-temp_min": "float", 
                              "main-temp_max": "float", "main-pressure": "int", "main-humidity": "int",
                              "main-sea_level": "int", "main-grnd_level": "int", "visibility": "int", 
                              "wind-speed": "float", "wind-deg": "int", "wind-gust": "float", 
                              "rain-1h": "float", "snow-1h": "float", "clouds-all": "int"})
df_tiempo["dt"] = pd.to_datetime(df_tiempo["dt"], unit="s", utc=True).dt.tz_convert("Europe/Madrid")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 22 columns):
 #   Column           Non-Null Count  Dtype                        
---  ------           --------------  -----                        
 0   name             25 non-null     object                       
 1   continente       25 non-null     object                       
 2   lat              25 non-null     float64                      
 3   lon              25 non-null     float64                      
 4   main             25 non-null     object                       
 5   description      25 non-null     object                       
 6   main-temp        25 non-null     float64                      
 7   main-feels_like  25 non-null     float64                      
 8   main-temp_min    25 non-null     float64                      
 9   main-temp_max    25 non-null     float64                      
 10  main-pressure    25 non-null     int64                        
 11  main-hum

In [11]:
df_tiempo.fillna(0, inplace=True)

In [12]:
df_tiempo.sample(5)

Unnamed: 0,name,continente,lat,lon,main,description,main-temp,main-feels_like,main-temp_min,main-temp_max,...,main-sea_level,main-grnd_level,visibility,wind-speed,wind-deg,wind-gust,rain-1h,snow-1h,clouds-all,dt
6,Ottawa,America,45.420878,-75.690111,Clouds,algo de nubes,-4.36,-9.74,-5.44,-3.34,...,1015,1003,10000,4.12,50,0.0,0.0,0.0,20,2025-12-02 21:18:29+01:00
10,Beijing,Asia,39.906217,116.391276,Clear,cielo claro,-7.06,-14.06,-7.06,-7.06,...,1031,1025,10000,5.78,304,12.65,0.0,0.0,0,2025-12-02 21:19:41+01:00
12,New Delhi,Asia,28.613895,77.209006,Mist,niebla,12.07,11.47,12.07,12.07,...,1016,990,2500,0.0,0,0.0,0.0,0.0,0,2025-12-02 21:16:33+01:00
24,Apia,Oceania,-13.834369,-171.769279,Rain,lluvia ligera,29.95,36.16,29.95,29.95,...,1012,1011,10000,2.24,100,2.13,0.18,0.0,12,2025-12-02 21:19:52+01:00
13,Seoul,Asia,37.566679,126.978291,Clear,cielo claro,-7.24,-13.69,-7.24,-7.24,...,1026,1016,10000,4.63,360,0.0,0.0,0.0,0,2025-12-02 21:19:33+01:00


In [18]:
df_tiempo.to_csv("tiempo_global.csv")

TypeError: 'NoneType' object is not subscriptable

### Data Analysis
- Which cities are the hottest and coldest right now?
- Is there a relationship between temperature and humidity?
- Which continent seems to have the windiest cities?
- What’s the most common weather description (e.g., clear, cloudy, rainy)?

In [14]:
df_tiempo.loc[df_tiempo.loc[:,"main-temp"].idxmax(), ["name", "main-temp"]]

name          Apia
main-temp    29.95
Name: 24, dtype: object

In [15]:
df_tiempo.loc[df_tiempo.loc[:,"main-temp"].idxmin(), ["name", "main-temp"]]

name         Seoul
main-temp    -7.24
Name: 13, dtype: object

In [16]:
df_tiempo.loc[:,["lat", "lon", "main-temp", "main-feels_like", "main-humidity", "main-sea_level", "wind-speed"]].corr().style.background_gradient(cmap="RdBu", vmin=-1, vmax=1)

Unnamed: 0,lat,lon,main-temp,main-feels_like,main-humidity,main-sea_level,wind-speed
lat,1.0,-0.181239,-0.612226,-0.628117,-0.069159,0.279126,0.07323
lon,-0.181239,1.0,-0.183562,-0.188297,0.05078,0.200511,0.075241
main-temp,-0.612226,-0.183562,1.0,0.994816,-0.068024,-0.495651,-0.449988
main-feels_like,-0.628117,-0.188297,0.994816,1.0,-0.004136,-0.505425,-0.451187
main-humidity,-0.069159,0.05078,-0.068024,-0.004136,1.0,-0.489692,0.01333
main-sea_level,0.279126,0.200511,-0.495651,-0.505425,-0.489692,1.0,0.107549
wind-speed,0.07323,0.075241,-0.449988,-0.451187,0.01333,0.107549,1.0


In [27]:
# hacer estos dos una tabla gerarquica
df_tiempo.groupby("continente")["wind-gust"].max().sort_values(ascending=False)

continente
Asia       12.65
Africa      4.28
Oceania     2.13
America     0.00
Europa      0.00
Name: wind-gust, dtype: float64

In [28]:
df_tiempo.groupby("continente")["wind-speed"].max().sort_values(ascending=False)

continente
Oceania    7.20
Asia       5.78
Africa     5.66
America    5.14
Europa     5.14
Name: wind-speed, dtype: float64

In [32]:
df_tiempo.value_counts("main")

main
Clouds    17
Clear      6
Mist       1
Rain       1
Name: count, dtype: int64

### Visualization

### Summary