In [None]:
import polars as pl
from datetime import datetime, date

In [None]:
# Load datasets
un = pl.read_csv("data/un_basic.csv", try_parse_dates=True)
forest_area = pl.read_csv("data/our_world_in_data/forest-area-km.csv")
weather = pl.read_parquet("data/prague-meteostat.parquet")

In [None]:
# Solution load-cities
cities = pl.read_parquet("data/worldcities.parquet")
cities

In [None]:
# Solution world-map
cities.plot.scatter(
    x="lng",
    y="lat",
    # The following arguments are optional
    hover_cols=["city"],
    color="country",
    title="Cities of the World",
    height=500,
    width=1000,
    legend=False,
    grid=True
)

In [None]:
# Solution ten-smallest
ten_smallest = pl.sql("SELECT * FROM un ORDER BY population LIMIT 10").collect()
ten_smallest.plot.bar(x="iso3", y="population", color="region", hover_cols=["country"])

In [None]:
# Solution membership-years
current_date = date.today()
pl.sql(f"SELECT *, ('{current_date}'::DATE - admission_date)::INTERVAL AS membership_in_years FROM un").collect()
# TODO: How to convert the years to days (INTERVAL '1 year' does not work)


In [None]:
# Solution energy-cz
el_source = pl.read_csv("data/our_world_in_data/electricity-source.csv", infer_schema_length=5000)
el_source_czechia = pl.sql("SELECT * FROM el_source WHERE country = 'Czechia'").collect()
el_source_czechia

In [None]:
# Solution energy-cz (1)
el_source_czechia.plot.area(x="year", y=["nuclear", "hydro", "fossil", "renewables"], stacked=True)

In [None]:
# Solution energy-cz (2)
el_source_czechia.plot(x="year", y=["nuclear", "hydro", "fossil", "renewables"])

In [None]:
# Solution founding-members
first_date = un["admission_date"].min()
founding_members = pl.sql(f"SELECT * FROM un WHERE admission_date == '{first_date}'::DATE").collect()
founding_members

In [57]:
# Solution forest-change
pl.sql("""
    SELECT Entity, (area_last - area_first) / area_first AS rel_diff
    FROM
    (
        SELECT Entity, FIRST("Forest area") AS area_first, LAST("Forest area") AS area_last 
        FROM forest_area
        GROUP BY Entity
    ) AS f
    WHERE area_first > 0 AND area_last > 0 
    ORDER BY rel_diff
""").collect()

Entity,rel_diff
str,f64
"""Micronesia""",-0.62088
"""Nicaragua""",-0.467517
"""Niger""",-0.444884
"""Gambia""",-0.414774
"""Paraguay""",-0.369672
…,…
"""Kuwait""",0.811594
"""Uruguay""",1.545113
"""Cape Verde""",1.972692
"""Iceland""",2.008202


In [66]:
# Solution hottest-night
pl.sql("""
    SELECT time::DATE AS date, min(temp) as min_temp FROM weather
    WHERE time > '2014-01-01'
    GROUP BY time::DATE
    ORDER BY min_temp DESC
    LIMIT 10
"""
).collect()

date,min_temp
date,f64
2018-08-08,21.2
2015-08-08,20.7
2018-08-02,20.6
2018-08-03,20.6
2018-08-01,20.6
2015-08-14,19.7
2015-08-11,19.7
2021-06-21,19.7
2023-08-25,19.6
2017-08-01,19.6


In [None]:
# Solution million-cities (bonus)
million_cities_per_region = pl.sql("""
    SELECT region, subregion, count(*) AS count, first(city), first(population) FROM
    (
        SELECT * FROM cities ORDER by population DESC
    ) AS c
    INNER JOIN un on c.iso3 = un.iso3
    WHERE population > 1000000
    GROUP BY region, subregion
    ORDER BY count DESC
""").collect()
million_cities_per_region


In [None]:
# Solution forest-region
# TODO: I don't know yet how to do it
forest_area_by_region = forest_area.join(un, left_on="Code", right_on="iso3", how="inner").pivot(
    on="region", values="Forest area", index="Year", aggregate_function="sum"
)
forest_area_by_region.plot.area(stacked=True, x="Year")