In [None]:
import polars as pl
from datetime import datetime, date

In [None]:
# Load datasets
un = pl.read_csv("data/un_basic.csv", try_parse_dates=True)
forest_area = pl.read_csv("data/our_world_in_data/forest-area-km.csv")
weather = pl.read_parquet("data/prague-meteostat.parquet")

In [None]:
# Solution load-cities
cities = pl.read_parquet("data/worldcities.parquet")
cities

In [None]:
# Solution world-map
cities.plot.scatter(
    x="lng",
    y="lat",
    # The following arguments are optional
    hover_cols=["city"],
    color="country",
    title="Cities of the World",
    height=500,
    width=1000,
    legend=False,
    grid=True
)

In [None]:
# Solution ten-smallest
sorted_population = un.sort("population")
ten_smallest = sorted_population.head(10)
ten_smallest.plot.bar(x="iso3", y="population", color="region", hover_cols=["country"])

In [None]:
# Solution membership-years
un.with_columns(
    membership_in_years = (
        (date.today() - un["admission_date"]).dt.total_days() // 365.25).cast(pl.Int64)
)

In [None]:
# Solution energy-cz
el_source = pl.read_csv("data/our_world_in_data/electricity-source.csv", infer_schema_length=5000)
el_source_czechia = el_source.filter(country="Czechia")
el_source_czechia

In [None]:
# Solution energy-cz (1)
el_source_czechia.plot.area(x="year", y=["nuclear", "hydro", "fossil", "renewables"], stacked=True)

In [None]:
# Solution energy-cz (2)
el_source_czechia.plot(x="year", y=["nuclear", "hydro", "fossil", "renewables"])

In [None]:
# Solution founding-members
first_date = un["admission_date"].min()
founding_members =  un.filter(admission_date = first_date)
founding_members

In [None]:
# Solution forest-change
forest_area.group_by("Entity").agg(
    area_first=pl.col("Forest area").first().alias("area_first"),
    area_last=pl.col("Forest area").last(),
    area_diff=pl.col("Forest area").last() - pl.col("Forest area").first()
).with_columns(
    rel_diff=pl.col("area_diff") / pl.col("area_first")
).filter(pl.col("rel_diff").is_finite()).sort("rel_diff").drop_nulls()

In [None]:
# Solution forest-change
first_and_last_forest_area = forest_area.group_by("Entity").agg(
    area_first=pl.col("Forest area").first(),
    area_last=pl.col("Forest area").last(),
)
relative_change = first_and_last_forest_area.select(
    "Entity",
    rel_diff=(pl.col("area_last") - pl.col("area_first")) / pl.col("area_first")
).filter(pl.col("rel_diff").is_finite())
relative_change.sort("rel_diff")

In [None]:
# Solution hottest-night
recent_weather = weather.filter(pl.col("time") > datetime(2014, 1, 1))
min_daily_temperatures = recent_weather.set_sorted("time").group_by_dynamic("time", every="1d").agg(min_temp=pl.col("temp").drop_nans().min())
top_nights = min_daily_temperatures.sort("min_temp", descending=True).head(10)
top_nights

In [None]:
# Solution million-cities
million_cities = cities.filter(pl.col("population") > 1e6)
million_cities_with_country = million_cities.join(un, on="iso3", how="inner")
million_cities_per_region = (
    million_cities_with_country.group_by("region", "subregion")
    .len()
    .rename({"len": "count"})
    .sort("count", descending=True)
)
million_cities_per_region


In [None]:
# Solution million-cities (bonus)
million_cities = cities.filter(pl.col("population") > 1e6).sort("population", descending=True)
million_cities_with_country = million_cities.join(un, on="iso3", how="inner")
million_cities_per_region = (
    million_cities_with_country.group_by("region", "subregion")
    .agg(
        pl.col("population").count().alias("count"),
        pl.col("city").first(),
        pl.col("population").first(),
    )
    .sort("count", descending=True)
)
million_cities_per_region


In [None]:
# Solution forest-region
forest_area_by_region = forest_area.join(un, left_on="Code", right_on="iso3", how="inner").pivot(
    on="region", values="Forest area", index="Year", aggregate_function="sum"
)
forest_area_by_region.plot.area(stacked=True, x="Year")