In [1]:
!pip install polars

Collecting polars
  Downloading polars-1.32.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (15 kB)
Downloading polars-1.32.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (38.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m38.0/38.0 MB[0m [31m144.9 MB/s[0m eta [36m0:00:00[0m00:01[0m
Installing collected packages: polars
Successfully installed polars-1.32.0


In [2]:
import polars as pl

In [None]:

# Read the weather data CSV into a Polars DataFrame
df = pl.read_csv("hourly-weather-data.csv")
print(df.shape)        
print(df.columns)     

(221352, 9)
['date', 'temperature_2m', 'apparent_temperature', 'precipitation', 'wind_speed_10m', 'snow_depth', 'sunshine_duration', 'direct_radiation', 'wind_gusts_10m']


In [4]:
# Display the first 5 rows of the dataset
print(df.head(5))

shape: (5, 9)
┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐
│ date      ┆ temperatu ┆ apparent_ ┆ precipita ┆ … ┆ snow_dept ┆ sunshine_ ┆ direct_ra ┆ wind_gus │
│ ---       ┆ re_2m     ┆ temperatu ┆ tion      ┆   ┆ h         ┆ duration  ┆ diation   ┆ ts_10m   │
│ str       ┆ ---       ┆ re        ┆ ---       ┆   ┆ ---       ┆ ---       ┆ ---       ┆ ---      │
│           ┆ f64       ┆ ---       ┆ f64       ┆   ┆ f64       ┆ f64       ┆ f64       ┆ f64      │
│           ┆           ┆ f64       ┆           ┆   ┆           ┆           ┆           ┆          │
╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡
│ 2000-02-0 ┆ -8.6315   ┆ -15.03086 ┆ 0.0       ┆ … ┆ 0.06      ┆ 0.0       ┆ 0.0       ┆ 39.96    │
│ 2 04:00:0 ┆           ┆           ┆           ┆   ┆           ┆           ┆           ┆          │
│ 0+00:00   ┆           ┆           ┆           ┆   ┆           ┆           ┆

In [5]:
# Check null counts
print("❓ Null values per column:")
print(df.null_count())

❓ Null values per column:
shape: (1, 9)
┌──────┬────────────┬────────────┬────────────┬───┬────────────┬───────────┬───────────┬───────────┐
│ date ┆ temperatur ┆ apparent_t ┆ precipitat ┆ … ┆ snow_depth ┆ sunshine_ ┆ direct_ra ┆ wind_gust │
│ ---  ┆ e_2m       ┆ emperature ┆ ion        ┆   ┆ ---        ┆ duration  ┆ diation   ┆ s_10m     │
│ u32  ┆ ---        ┆ ---        ┆ ---        ┆   ┆ u32        ┆ ---       ┆ ---       ┆ ---       │
│      ┆ u32        ┆ u32        ┆ u32        ┆   ┆            ┆ u32       ┆ u32       ┆ u32       │
╞══════╪════════════╪════════════╪════════════╪═══╪════════════╪═══════════╪═══════════╪═══════════╡
│ 0    ┆ 0          ┆ 0          ┆ 0          ┆ … ┆ 76         ┆ 0         ┆ 0         ┆ 0         │
└──────┴────────────┴────────────┴────────────┴───┴────────────┴───────────┴───────────┴───────────┘


In [None]:

df_clean = df.drop_nulls()


In [7]:
q1 = df.select(pl.col("wind_gusts_10m").quantile(0.25)).item()
q3 = df.select(pl.col("wind_gusts_10m").quantile(0.75)).item()
iqr = q3 - q1

lower_bound = q1 - 1.5 * iqr
upper_bound = q3 + 1.5 * iqr


In [8]:
df_iqr_filtered = df.filter(
    (pl.col("wind_gusts_10m") >= lower_bound) & 
    (pl.col("wind_gusts_10m") <= upper_bound)
)

print("Before Cleaning:")
print(df.select("wind_gusts_10m").describe())

print("\nAfter Cleaning:")
print(df_iqr_filtered.select("wind_gusts_10m").describe())


Before Cleaning:
shape: (9, 2)
┌────────────┬────────────────┐
│ statistic  ┆ wind_gusts_10m │
│ ---        ┆ ---            │
│ str        ┆ f64            │
╞════════════╪════════════════╡
│ count      ┆ 221352.0       │
│ null_count ┆ 0.0            │
│ mean       ┆ 27.586993      │
│ std        ┆ 12.689802      │
│ min        ┆ 1.08           │
│ 25%        ┆ 18.0           │
│ 50%        ┆ 25.919998      │
│ 75%        ┆ 35.28          │
│ max        ┆ 119.88         │
└────────────┴────────────────┘

After Cleaning:
shape: (9, 2)
┌────────────┬────────────────┐
│ statistic  ┆ wind_gusts_10m │
│ ---        ┆ ---            │
│ str        ┆ f64            │
╞════════════╪════════════════╡
│ count      ┆ 218320.0       │
│ null_count ┆ 0.0            │
│ mean       ┆ 27.02248       │
│ std        ┆ 11.809349      │
│ min        ┆ 1.08           │
│ 25%        ┆ 18.0           │
│ 50%        ┆ 25.56          │
│ 75%        ┆ 34.92          │
│ max        ┆ 61.199997      │
└─────────

In [9]:
# Filter for rows where the temperature is below -15 °C
cold_df = df.filter(pl.col("temperature_2m") < -15)
print(f"Number of freezing hours: {cold_df.height}")
print(cold_df.select(["date", "temperature_2m", "apparent_temperature", "wind_gusts_10m" ]).head(10))

Number of freezing hours: 2400
shape: (10, 4)
┌───────────────────────────┬────────────────┬──────────────────────┬────────────────┐
│ date                      ┆ temperature_2m ┆ apparent_temperature ┆ wind_gusts_10m │
│ ---                       ┆ ---            ┆ ---                  ┆ ---            │
│ str                       ┆ f64            ┆ f64                  ┆ f64            │
╞═══════════════════════════╪════════════════╪══════════════════════╪════════════════╡
│ 2000-02-08 13:00:00+00:00 ┆ -17.0315       ┆ -22.089499           ┆ 14.04          │
│ 2000-02-12 09:00:00+00:00 ┆ -15.2315       ┆ -20.287348           ┆ 15.84          │
│ 2000-02-12 10:00:00+00:00 ┆ -15.8315       ┆ -20.99727            ┆ 15.48          │
│ 2000-02-12 11:00:00+00:00 ┆ -16.381498     ┆ -21.683455           ┆ 15.119999      │
│ 2000-02-12 12:00:00+00:00 ┆ -16.981499     ┆ -22.349497           ┆ 15.84          │
│ 2000-02-12 13:00:00+00:00 ┆ -17.5815       ┆ -22.61591            ┆ 15.48         

In [10]:
# Select a subset of columns (e.g., Date, Temperature, Humidity, Wind Speed)
subset_df = df.select([
    "date", 
    "temperature_2m", 
    "precipitation", 
    "wind_gusts_10m"
])
print(subset_df.head(5))


shape: (5, 4)
┌───────────────────────────┬────────────────┬───────────────┬────────────────┐
│ date                      ┆ temperature_2m ┆ precipitation ┆ wind_gusts_10m │
│ ---                       ┆ ---            ┆ ---           ┆ ---            │
│ str                       ┆ f64            ┆ f64           ┆ f64            │
╞═══════════════════════════╪════════════════╪═══════════════╪════════════════╡
│ 2000-02-02 04:00:00+00:00 ┆ -8.6315        ┆ 0.0           ┆ 39.96          │
│ 2000-02-02 05:00:00+00:00 ┆ -9.1315        ┆ 0.0           ┆ 39.239998      │
│ 2000-02-02 06:00:00+00:00 ┆ -9.8315        ┆ 0.0           ┆ 39.6           │
│ 2000-02-02 07:00:00+00:00 ┆ -10.4815       ┆ 0.0           ┆ 40.32          │
│ 2000-02-02 08:00:00+00:00 ┆ -11.2315       ┆ 0.0           ┆ 39.6           │
└───────────────────────────┴────────────────┴───────────────┴────────────────┘


In [11]:
result = (
    df
    .with_columns((pl.col("temperature_2m") - pl.col("apparent_temperature")).alias("feels_like_diff"))
    .filter(pl.col("feels_like_diff") > 0)
    .sort("feels_like_diff", descending=True)
    .select(["date", "temperature_2m", "apparent_temperature", "feels_like_diff"])
)
print(result)

shape: (180_392, 4)
┌───────────────────────────┬────────────────┬──────────────────────┬─────────────────┐
│ date                      ┆ temperature_2m ┆ apparent_temperature ┆ feels_like_diff │
│ ---                       ┆ ---            ┆ ---                  ┆ ---             │
│ str                       ┆ f64            ┆ f64                  ┆ f64             │
╞═══════════════════════════╪════════════════╪══════════════════════╪═════════════════╡
│ 2022-12-24 01:00:00+00:00 ┆ -13.564        ┆ -24.630537           ┆ 11.066537       │
│ 2022-12-24 02:00:00+00:00 ┆ -13.664001     ┆ -24.58174            ┆ 10.917739       │
│ 2022-12-24 06:00:00+00:00 ┆ -14.014        ┆ -24.723354           ┆ 10.709354       │
│ 2018-04-04 20:00:00+00:00 ┆ 0.086          ┆ -10.37068            ┆ 10.45668        │
│ 2022-12-24 00:00:00+00:00 ┆ -12.914001     ┆ -23.365795           ┆ 10.451794       │
│ …                         ┆ …              ┆ …                    ┆ …               │
│ 2018-07-03

In [12]:
print(df.schema)

Schema({'date': String, 'temperature_2m': Float64, 'apparent_temperature': Float64, 'precipitation': Float64, 'wind_speed_10m': Float64, 'snow_depth': Float64, 'sunshine_duration': Float64, 'direct_radiation': Float64, 'wind_gusts_10m': Float64})


In [13]:
# Step 1: Convert String to Datetime
df = df.with_columns(
    pl.col("date").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S%z", strict=False)
)
# Step 2: Daily grouping
daily_summary = (
    df
    .with_columns(pl.col("date").dt.date().alias("day"))
    .group_by("day")
    .agg([
        pl.col("temperature_2m").mean().alias("avg_temp"),
        pl.col("precipitation").sum().alias("total_precip"),
        pl.col("sunshine_duration").sum().alias("total_sunshine")
    ])
    .sort("day")
)

print(daily_summary)


shape: (9_224, 4)
┌────────────┬───────────┬──────────────┬────────────────┐
│ day        ┆ avg_temp  ┆ total_precip ┆ total_sunshine │
│ ---        ┆ ---       ┆ ---          ┆ ---            │
│ date       ┆ f64       ┆ f64          ┆ f64            │
╞════════════╪═══════════╪══════════════╪════════════════╡
│ 2000-02-02 ┆ -10.1015  ┆ 0.0          ┆ 32060.7265     │
│ 2000-02-03 ┆ -4.896083 ┆ 3.2          ┆ 0.0            │
│ 2000-02-04 ┆ -6.266917 ┆ 1.3          ┆ 30920.767102   │
│ 2000-02-05 ┆ -8.094    ┆ 0.0          ┆ 25480.016775   │
│ 2000-02-06 ┆ -5.883583 ┆ 0.0          ┆ 32439.120612   │
│ …          ┆ …         ┆ …            ┆ …              │
│ 2025-04-30 ┆ 6.396417  ┆ 0.0          ┆ 46801.9665     │
│ 2025-05-01 ┆ 7.00475   ┆ 9.9          ┆ 8317.533717    │
│ 2025-05-02 ┆ 12.9235   ┆ 1.9          ┆ 32010.04534    │
│ 2025-05-03 ┆ 8.71725   ┆ 1.3          ┆ 8251.1989      │
│ 2025-05-04 ┆ 7.811     ┆ 0.0          ┆ 0.0            │
└────────────┴───────────┴────────────

In [14]:
df_rolling_avg = df.with_columns(
    pl.col("temperature_2m")
    .rolling_mean(window_size=3)
    .alias("rolling_avg_temp")
)
print(df_rolling_avg.head(5))

shape: (5, 10)
┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐
│ date      ┆ temperatu ┆ apparent_ ┆ precipita ┆ … ┆ sunshine_ ┆ direct_ra ┆ wind_gust ┆ rolling_ │
│ ---       ┆ re_2m     ┆ temperatu ┆ tion      ┆   ┆ duration  ┆ diation   ┆ s_10m     ┆ avg_temp │
│ datetime[ ┆ ---       ┆ re        ┆ ---       ┆   ┆ ---       ┆ ---       ┆ ---       ┆ ---      │
│ μs, UTC]  ┆ f64       ┆ ---       ┆ f64       ┆   ┆ f64       ┆ f64       ┆ f64       ┆ f64      │
│           ┆           ┆ f64       ┆           ┆   ┆           ┆           ┆           ┆          │
╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡
│ 2000-02-0 ┆ -8.6315   ┆ -15.03086 ┆ 0.0       ┆ … ┆ 0.0       ┆ 0.0       ┆ 39.96     ┆ null     │
│ 2         ┆           ┆           ┆           ┆   ┆           ┆           ┆           ┆          │
│ 04:00:00  ┆           ┆           ┆           ┆   ┆           ┆           

In [15]:
df_percent_change = df.with_columns(
    (pl.col("wind_gusts_10m") / pl.col("wind_gusts_10m").shift(1) - 1).alias("wind_gusts_pct_change")
)
print(df_percent_change.head(5))

shape: (5, 10)
┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐
│ date      ┆ temperatu ┆ apparent_ ┆ precipita ┆ … ┆ sunshine_ ┆ direct_ra ┆ wind_gust ┆ wind_gus │
│ ---       ┆ re_2m     ┆ temperatu ┆ tion      ┆   ┆ duration  ┆ diation   ┆ s_10m     ┆ ts_pct_c │
│ datetime[ ┆ ---       ┆ re        ┆ ---       ┆   ┆ ---       ┆ ---       ┆ ---       ┆ hange    │
│ μs, UTC]  ┆ f64       ┆ ---       ┆ f64       ┆   ┆ f64       ┆ f64       ┆ f64       ┆ ---      │
│           ┆           ┆ f64       ┆           ┆   ┆           ┆           ┆           ┆ f64      │
╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡
│ 2000-02-0 ┆ -8.6315   ┆ -15.03086 ┆ 0.0       ┆ … ┆ 0.0       ┆ 0.0       ┆ 39.96     ┆ null     │
│ 2         ┆           ┆           ┆           ┆   ┆           ┆           ┆           ┆          │
│ 04:00:00  ┆           ┆           ┆           ┆   ┆           ┆           

In [16]:
df_ranking = df.sort("direct_radiation", descending=True).with_columns(
    pl.col("direct_radiation").rank(method="dense").alias("radiation_rank")
)

print(df_ranking.head(5))

shape: (5, 10)
┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐
│ date      ┆ temperatu ┆ apparent_ ┆ precipita ┆ … ┆ sunshine_ ┆ direct_ra ┆ wind_gust ┆ radiatio │
│ ---       ┆ re_2m     ┆ temperatu ┆ tion      ┆   ┆ duration  ┆ diation   ┆ s_10m     ┆ n_rank   │
│ datetime[ ┆ ---       ┆ re        ┆ ---       ┆   ┆ ---       ┆ ---       ┆ ---       ┆ ---      │
│ μs, UTC]  ┆ f64       ┆ ---       ┆ f64       ┆   ┆ f64       ┆ f64       ┆ f64       ┆ u32      │
│           ┆           ┆ f64       ┆           ┆   ┆           ┆           ┆           ┆          │
╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡
│ 2006-06-1 ┆ 14.8185   ┆ 11.382072 ┆ 0.0       ┆ … ┆ 3600.0    ┆ 909.0     ┆ 51.839996 ┆ 888      │
│ 0         ┆           ┆           ┆           ┆   ┆           ┆           ┆           ┆          │
│ 18:00:00  ┆           ┆           ┆           ┆   ┆           ┆           

In [17]:
df_categorization = df.with_columns([
    pl.when(pl.col("precipitation") > 0.1)
    .then(pl.lit("Rainy"))
    .when(pl.col("snow_depth") > 0.05)
    .then(pl.lit("Snowy"))
    .when(pl.col("temperature_2m") > 30)
    .then(pl.lit("Hot"))
    .otherwise(pl.lit("Clear"))
    .alias("weather_type")
])

print(df_categorization.head(5))


shape: (5, 10)
┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐
│ date      ┆ temperatu ┆ apparent_ ┆ precipita ┆ … ┆ sunshine_ ┆ direct_ra ┆ wind_gust ┆ weather_ │
│ ---       ┆ re_2m     ┆ temperatu ┆ tion      ┆   ┆ duration  ┆ diation   ┆ s_10m     ┆ type     │
│ datetime[ ┆ ---       ┆ re        ┆ ---       ┆   ┆ ---       ┆ ---       ┆ ---       ┆ ---      │
│ μs, UTC]  ┆ f64       ┆ ---       ┆ f64       ┆   ┆ f64       ┆ f64       ┆ f64       ┆ str      │
│           ┆           ┆ f64       ┆           ┆   ┆           ┆           ┆           ┆          │
╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡
│ 2000-02-0 ┆ -8.6315   ┆ -15.03086 ┆ 0.0       ┆ … ┆ 0.0       ┆ 0.0       ┆ 39.96     ┆ Snowy    │
│ 2         ┆           ┆           ┆           ┆   ┆           ┆           ┆           ┆          │
│ 04:00:00  ┆           ┆           ┆           ┆   ┆           ┆           

In [18]:
# Check dtype of date column
print(df.schema)

Schema({'date': Datetime(time_unit='us', time_zone='UTC'), 'temperature_2m': Float64, 'apparent_temperature': Float64, 'precipitation': Float64, 'wind_speed_10m': Float64, 'snow_depth': Float64, 'sunshine_duration': Float64, 'direct_radiation': Float64, 'wind_gusts_10m': Float64})


In [None]:
df_hourly_avg = df.with_columns([
    pl.col("date").alias("date_parsed") if df.schema["date"] == pl.Datetime else pl.col("date").str.strptime(pl.Datetime, "%Y-%m-%d %H:%M:%S%z").alias("date_parsed")
]).with_columns([
    pl.col("date_parsed").dt.hour().alias("hour"),
    pl.lit(1).alias("myon")
])
# Step 2: Pivot
pivot_result = df_hourly_avg.pivot(
    values="temperature_2m",
    index="hour",
    columns="myon",  # This is constant
    aggregate_function="mean"
).sort("hour")

print(pivot_result)


shape: (24, 2)
┌──────┬───────────┐
│ hour ┆ 1         │
│ ---  ┆ ---       │
│ i8   ┆ f64       │
╞══════╪═══════════╡
│ 0    ┆ 9.271092  │
│ 1    ┆ 8.647163  │
│ 2    ┆ 8.160065  │
│ 3    ┆ 7.756661  │
│ 4    ┆ 7.408431  │
│ …    ┆ …         │
│ 19   ┆ 11.290796 │
│ 20   ┆ 11.308881 │
│ 21   ┆ 11.08486  │
│ 22   ┆ 10.604507 │
│ 23   ┆ 9.967414  │
└──────┴───────────┘


  pivot_result = df_hourly_avg.pivot(


In [20]:
lazy_df = (
    pl.scan_csv("https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/cgBu1y94dmPAnw4HfZPeOg/hourly-weather-data.csv")  # File not loaded yet
    .filter(pl.col("temperature_2m") > 25)
    .select(["date", "temperature_2m"])
)

In [21]:
result = lazy_df.collect()  #  Runs everything, returns a real DataFrame
print(result)

shape: (8_043, 2)
┌───────────────────────────┬────────────────┐
│ date                      ┆ temperature_2m │
│ ---                       ┆ ---            │
│ str                       ┆ f64            │
╞═══════════════════════════╪════════════════╡
│ 2000-05-06 19:00:00+00:00 ┆ 25.0685        │
│ 2000-06-10 16:00:00+00:00 ┆ 25.268501      │
│ 2000-06-10 17:00:00+00:00 ┆ 26.118502      │
│ 2000-06-10 18:00:00+00:00 ┆ 26.6685        │
│ 2000-06-10 19:00:00+00:00 ┆ 27.368502      │
│ …                         ┆ …              │
│ 2024-08-31 20:00:00+00:00 ┆ 26.486         │
│ 2024-08-31 21:00:00+00:00 ┆ 25.486         │
│ 2024-09-01 17:00:00+00:00 ┆ 25.086         │
│ 2024-09-01 18:00:00+00:00 ┆ 25.486         │
│ 2024-09-01 19:00:00+00:00 ┆ 25.286         │
└───────────────────────────┴────────────────┘


In [22]:
import polars as pl

weather_df = pl.DataFrame({
    "station_id": [101, 102, 103, 104, 105],
    "date": ["2024-01-01", "2024-01-01", "2024-01-01", "2024-01-01", "2024-01-01"],
    "temperature": [25.4, 21.2, 18.9, 26.5, 19.0],
    "precipitation": [0.0, 0.5, 0.0, 1.2, 0.0]
})

station_df = pl.DataFrame({
    "station_id": [101, 102, 103, 106],  # Note: station 104 and 105 are missing, 106 is extra
    "station_name": ["North Point", "West Side", "East End", "South Base"],
    "elevation": [150, 180, 200, 250]
})

In [23]:
# # Perform inner join
inner_join = weather_df.join(station_df, on="station_id", how="inner")

print(inner_join)

# Perform Left join
left_join = weather_df.join(station_df, on="station_id", how="left")
print(left_join)

# Perform Outer join
outer_join = weather_df.join(station_df, on="station_id", how="full")
print(outer_join)

# Perform Antijoin
anti_join = weather_df.join(station_df, on="station_id", how="anti")
print(anti_join)

shape: (3, 6)
┌────────────┬────────────┬─────────────┬───────────────┬──────────────┬───────────┐
│ station_id ┆ date       ┆ temperature ┆ precipitation ┆ station_name ┆ elevation │
│ ---        ┆ ---        ┆ ---         ┆ ---           ┆ ---          ┆ ---       │
│ i64        ┆ str        ┆ f64         ┆ f64           ┆ str          ┆ i64       │
╞════════════╪════════════╪═════════════╪═══════════════╪══════════════╪═══════════╡
│ 101        ┆ 2024-01-01 ┆ 25.4        ┆ 0.0           ┆ North Point  ┆ 150       │
│ 102        ┆ 2024-01-01 ┆ 21.2        ┆ 0.5           ┆ West Side    ┆ 180       │
│ 103        ┆ 2024-01-01 ┆ 18.9        ┆ 0.0           ┆ East End     ┆ 200       │
└────────────┴────────────┴─────────────┴───────────────┴──────────────┴───────────┘
shape: (5, 6)
┌────────────┬────────────┬─────────────┬───────────────┬──────────────┬───────────┐
│ station_id ┆ date       ┆ temperature ┆ precipitation ┆ station_name ┆ elevation │
│ ---        ┆ ---        ┆ ---      

In [24]:
station_df = station_df.rename({"station_id": "station_id_meta"})
print(station_df)

shape: (4, 3)
┌─────────────────┬──────────────┬───────────┐
│ station_id_meta ┆ station_name ┆ elevation │
│ ---             ┆ ---          ┆ ---       │
│ i64             ┆ str          ┆ i64       │
╞═════════════════╪══════════════╪═══════════╡
│ 101             ┆ North Point  ┆ 150       │
│ 102             ┆ West Side    ┆ 180       │
│ 103             ┆ East End     ┆ 200       │
│ 106             ┆ South Base   ┆ 250       │
└─────────────────┴──────────────┴───────────┘


In [25]:
combined_df = pl.concat([weather_df, station_df], how="horizontal")
print(combined_df)

shape: (5, 7)
┌────────────┬────────────┬─────────────┬───────────────┬───────────────┬──────────────┬───────────┐
│ station_id ┆ date       ┆ temperature ┆ precipitation ┆ station_id_me ┆ station_name ┆ elevation │
│ ---        ┆ ---        ┆ ---         ┆ ---           ┆ ta            ┆ ---          ┆ ---       │
│ i64        ┆ str        ┆ f64         ┆ f64           ┆ ---           ┆ str          ┆ i64       │
│            ┆            ┆             ┆               ┆ i64           ┆              ┆           │
╞════════════╪════════════╪═════════════╪═══════════════╪═══════════════╪══════════════╪═══════════╡
│ 101        ┆ 2024-01-01 ┆ 25.4        ┆ 0.0           ┆ 101           ┆ North Point  ┆ 150       │
│ 102        ┆ 2024-01-01 ┆ 21.2        ┆ 0.5           ┆ 102           ┆ West Side    ┆ 180       │
│ 103        ┆ 2024-01-01 ┆ 18.9        ┆ 0.0           ┆ 103           ┆ East End     ┆ 200       │
│ 104        ┆ 2024-01-01 ┆ 26.5        ┆ 1.2           ┆ 106           ┆ Sou