In [1]:
import pandas as pd 

df = pd.read_csv("temperature_data.csv")
df.head()

Unnamed: 0,city,timestamp,temperature,season
0,New York,2010-01-01,4.502302,winter
1,New York,2010-01-02,6.271956,winter
2,New York,2010-01-03,5.597786,winter
3,New York,2010-01-04,2.531935,winter
4,New York,2010-01-05,3.287991,winter


In [10]:
# just pandas
import datetime
import numpy as np

start = datetime.datetime.now()

df_pd = pd.read_csv("temperature_data.csv")

df_pd["moving_avg_30d"] = (
    df_pd.groupby("city")["temperature"]
      .rolling(window=30, min_periods=1)
      .mean()
      .reset_index(level=0, drop=True)
)

season_stats = (
    df_pd.groupby(["city", "season"])["temperature"]
      .agg(
          mean_temp_season="mean",
          std_temp_season="std"
      )
      .reset_index()
)

df_pd = df_pd.merge(
    season_stats,
    on=["city", "season"],
    how="left"
)

df_pd["anomaly"] = (np.abs(df_pd['temperature'] - df_pd['mean_temp_season']) > 2 * df_pd['std_temp_season'])

print("processed in", datetime.datetime.now() - start)

display(df_pd[df_pd["anomaly"] == True])

processed in 0:00:00.048319


Unnamed: 0,city,timestamp,temperature,season,moving_avg_30d,mean_temp_season,std_temp_season,anomaly
7,New York,2010-01-08,-10.351356,winter,2.207345,0.207991,4.969005,True
18,New York,2010-01-19,11.295133,winter,1.530647,0.207991,4.969005,True
23,New York,2010-01-24,11.071130,winter,1.731125,0.207991,4.969005,True
47,New York,2010-02-17,-15.990610,winter,1.288345,0.207991,4.969005,True
68,New York,2010-03-10,-1.161729,spring,3.520369,10.227827,4.878745,True
...,...,...,...,...,...,...,...,...
54659,Mexico City,2019-09-30,26.650136,autumn,15.826828,14.952652,5.112207,True
54664,Mexico City,2019-10-05,4.436405,autumn,15.189087,14.952652,5.112207,True
54690,Mexico City,2019-10-31,2.481776,autumn,15.672717,14.952652,5.112207,True
54698,Mexico City,2019-11-08,26.255102,autumn,15.345348,14.952652,5.112207,True


In [21]:
import polars as pl

start = datetime.datetime.now()

df_polars = pl.scan_csv("temperature_data.csv", try_parse_dates=True)

result = (
    df_polars
    .sort(["city", "timestamp"])
    .with_columns(
        pl.col("temperature")
          .rolling_mean(window_size=30)
          .over("city")
          .alias("moving_avg_30d")
    )
    .with_columns(
        pl.mean("temperature").over(["city", "season"]).alias("mean_temp_season"),
        pl.std("temperature").over(["city", "season"]).alias("std_temp_season"),
    )
    .with_columns(
        (
            (pl.col("temperature") - pl.col("mean_temp_season")).abs()
            > 2 * pl.col("std_temp_season")
        ).alias("anomaly")
    )
)

final_df_polars = result.collect().to_pandas()

print("processed in", datetime.datetime.now() - start)

display(final_df_polars)

processed in 0:00:00.014128


Unnamed: 0,city,timestamp,temperature,season,moving_avg_30d,mean_temp_season,std_temp_season,anomaly
0,Beijing,2010-01-01,-10.936319,winter,,-2.124854,4.999483,False
1,Beijing,2010-01-02,9.874975,winter,,-2.124854,4.999483,True
2,Beijing,2010-01-03,-0.136526,winter,,-2.124854,4.999483,False
3,Beijing,2010-01-04,-0.851152,winter,,-2.124854,4.999483,False
4,Beijing,2010-01-05,-4.785551,winter,,-2.124854,4.999483,False
...,...,...,...,...,...,...,...,...
54745,Tokyo,2019-12-25,10.768015,winter,9.258707,5.814909,5.027942,False
54746,Tokyo,2019-12-26,9.513571,winter,9.253099,5.814909,5.027942,False
54747,Tokyo,2019-12-27,12.382134,winter,8.896762,5.814909,5.027942,False
54748,Tokyo,2019-12-28,2.881147,winter,8.060673,5.814909,5.027942,False


In [20]:
final_df_polars.to_pandas()

Unnamed: 0,city,timestamp,temperature,season,moving_avg_30d,mean_temp_season,std_temp_season,anomaly
0,Beijing,2010-01-01,-10.936319,winter,,-2.124854,4.999483,False
1,Beijing,2010-01-02,9.874975,winter,,-2.124854,4.999483,True
2,Beijing,2010-01-03,-0.136526,winter,,-2.124854,4.999483,False
3,Beijing,2010-01-04,-0.851152,winter,,-2.124854,4.999483,False
4,Beijing,2010-01-05,-4.785551,winter,,-2.124854,4.999483,False
...,...,...,...,...,...,...,...,...
54745,Tokyo,2019-12-25,10.768015,winter,9.258707,5.814909,5.027942,False
54746,Tokyo,2019-12-26,9.513571,winter,9.253099,5.814909,5.027942,False
54747,Tokyo,2019-12-27,12.382134,winter,8.896762,5.814909,5.027942,False
54748,Tokyo,2019-12-28,2.881147,winter,8.060673,5.814909,5.027942,False


In [39]:
from dotenv import load_dotenv
import requests
import os
import json
import random
import time

load_dotenv()
api_key = os.getenv("openweatherapi")



In [36]:
cities = final_df_polars['city'].value_counts().index.tolist()

In [47]:
print(cities)

['Beijing', 'Berlin', 'Cairo', 'Dubai', 'London', 'Los Angeles', 'Mexico City', 'Moscow', 'Mumbai', 'New York', 'Paris', 'Rio de Janeiro', 'Singapore', 'Sydney', 'Tokyo']


In [41]:
REQUESTS = 10

def sync_requests():
    for _ in range(REQUESTS):
        city = random.choice(cities)
        response = requests.get(f'https://api.openweathermap.org/data/2.5/weather?q={city}&APPID={api_key}')
        response.raise_for_status()

start = time.perf_counter()
sync_requests()
end = time.perf_counter()

print(f"Sync time: {end - start:.2f} seconds")

Sync time: 8.39 seconds


In [44]:
import nest_asyncio
nest_asyncio.apply()

In [45]:
import asyncio
import aiohttp

REQUESTS = 10

async def fetch(session, url):
    async with session.get(url) as response:
        response.raise_for_status()
        await response.text()

async def async_requests():
    async with aiohttp.ClientSession() as session:
        tasks = [fetch(session, f'https://api.openweathermap.org/data/2.5/weather?q={random.choice(cities)}&APPID={api_key}') for _ in range(REQUESTS)]
        await asyncio.gather(*tasks)

start = time.perf_counter()
asyncio.run(async_requests())
end = time.perf_counter()

print(f"Async time: {end - start:.2f} seconds")

Async time: 1.38 seconds


In [None]:
q="London"
date=datetime.datetime.today().strftime('%Y-%m-%d')
x = requests.get(f'https://api.openweathermap.org/data/2.5/weather?q={q}&APPID={api_key}')
temp_k = json.loads(x.text)['main']['temp']
print(temp_k - 273.15)
print(x.text)