In [None]:
!pip install s3fs

# authentication credentials API weather
API_KEY = ""

# authentication credentials AWS account
S3_KEY = ""
S3_SECRET = ""
S3_BUCKET = ""
AWS_REGION = ""

In [None]:
import requests
import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import pandas as pd

NUME_ORAS = "Roma"

# define a function to determine the latitude and longitude of the city
def get_lat_lon(nume_oras: str):
    url = f"http://api.openweathermap.org/geo/1.0/direct?q={nume_oras.title()}&limit=5&appid=24917665b7868f0f2feba0e078dfde9a"

    response = requests.request("GET", url)

    data = response.json()
    return data[0]["lat"], data[0]["lon"]

# define a function to extract weather data for a date using the API call based on the previously found coordinates
def get_weather_data(date):
    lat, lon = get_lat_lon(nume_oras=NUME_ORAS)
    openweathermap_base_url = f"https://api.openweathermap.org/data/3.0/onecall/day_summary?lat={lat}&lon={lon}&date={date}&appid={API_KEY}"
    response = requests.get(openweathermap_base_url)
    if response.status_code == 200:
        return response.json()
    else:
        return None

# define a function to generate a data list for the last 10 years
def generate_date_range():
    today = datetime.datetime.now()
    date_range = [
        (today - datetime.timedelta(days=i))
        .replace(hour=12, minute=0, second=0)
        .strftime("%Y-%m-%d")
        for i in range(3650)
    ]
    return date_range

# weather data collection function for all data generated in the previous one
def fetch_all_weather_data():
    date_range = generate_date_range()
    data = []

    with ThreadPoolExecutor(max_workers=10) as executor:
        future_to_date = {
            executor.submit(get_weather_data, date): date for date in date_range
        }

        for future in tqdm(as_completed(future_to_date), total=len(date_range)):
            date = future_to_date[future]
            try:
                result = future.result()
                if result is not None:
                    data.append(result)
            except Exception as exc:
                print(f"Date {date} generated an exception: {exc}")

    return data


data = fetch_all_weather_data()

# JSON data normalization and transformation of the weather data list into a Pandas DataFrame
df = pd.json_normalize(data)

# saving processed data to CSV files stored in S3 using AWS connection credentials
storage_options = {
    "key": S3_KEY,
    "secret": S3_SECRET,
}
df.to_csv(
    f"s3://{S3_BUCKET}/{NUME_ORAS}_10_ani.csv",
    index=False,
    storage_options=storage_options,
)