In [1]:
import io.jhdf.HdfFile
import java.nio.file.Path

%use dataframe, kandy


Open the file and read the weather data into memory

In [2]:
val file = HdfFile(Path.of("/home/james/git/jhdf-webinar/hdf5-example/weather.hdf5"))
val weatherDataset = file.getDatasetByPath("weather/table")
val compountData = weatherDataset.getData() as Map<String, Any>

Convert the java map into a data frame

In [3]:
val country = columnOf(*(compountData.get("country") as Array<String>)).named("country")
val date = columnOf(*(compountData.get("date") as Array<String>)).named("date")
val temp = columnOf(*(compountData.get("avg_temp_c") as DoubleArray).toTypedArray()).named("temp")
val rainfall = columnOf(*((compountData.get("precipitation_mm") as DoubleArray).toTypedArray())).named("rainfall")
val pressure = columnOf(*((compountData.get("avg_sea_level_pres_hpa")  as DoubleArray).toTypedArray())).named("pressure")
val df = dataFrameOf(country, date, temp, rainfall, pressure)
    .parse { date }

See the countries available

In [4]:
df.country.distinct().toList()

[Afghanistan, Albania, Algeria, American Samoa, Angola, Anguilla, Antigua and Barbuda, Argentina, Armenia, Aruba, Australia, Austria, Azerbaijan, Bahrain, Bangladesh, Barbados, Belarus, Belgium, Belize, Benin, Bermuda, Bhutan, Bolivia, Bosnia and Herzegovina, Botswana, Brazil, British Indian Ocean Territory, Brunei, Bulgaria, Burkina Faso, Burundi, Cambodia, Cameroon, Canada, Cape Verde, Cayman Islands, Central African Republic, Chad, Chile, China, Christmas Island, Cocos (Keeling) Islands, Colombia, Comoros, Cook Islands, Costa Rica, Croatia, Cuba, Cyprus, Czech Republic, Democratic Republic of the Congo, Denmark, Djibouti, Dominica, Dominican Republic, East Timor, Ecuador, Egypt, El Salvador, Equatorial Guinea, Eritrea, Estonia, Ethiopia, Falkland Islands, Faroe Islands, Federated States of Micronesia, Fiji, Finland, France, French Guiana, French Polynesia, French Southern and Antarctic Lands, Gabon, Georgia, Germany, Ghana, Gibraltar, Greece, Greenland, Grenada, Guadeloupe, Guam, Gu

Plot the data for a few countries

In [5]:
val countries = setOf("Spain", "Finland", "United Kingdom", "United States")
df
    .filter { country in countries }
    .filter { temp.isFinite() }
    .filter { date in LocalDate.parse("1980-01-01")..LocalDate.parse("2000-01-01") }
    .groupBy(country).plot { 
        line { 
            x(date)
            y(temp)
            color(country)
        }
     }