# Exercises: Data Analysis with Python

In [None]:
import polars as pl

## Mean Temperature in Zurich
The weather station in Zurich Fluntern has been recording temperature data since 1864.

#### Import Data
The file *sma_zrh_historical.csv* contains daily mean temperatures.

In [None]:
df = pl.read_csv('data/sma_zrh_historical.csv', separator=';')
df

#### Preparing the data
We are only interested in the date (column 'reference_timestamp') and the daily mean temperature ('tre200d0'). Create a dataframe with only these two columns, and at the same time change the datatype of the date column and change the column names to something more meaningful.

In [None]:
temperature = (df
    .select(
        pl.col('reference_timestamp')
            .str.to_date(format='%d.%m.%Y %H:%M')
            .alias('Date'), 
        pl.col('tre200d0')
            .alias('Temperature')
    )
     )

temperature

#### Calculate yearly means and graph data
Calculate the yearly means using Polars' *group_by_dynamic* method. Graph the yearly mean as a function of time.

In [None]:
yearly_means = (temperature
                .group_by_dynamic('Date', every='1y')
                .agg(pl.col('Temperature').mean())
               )

yearly_means.plot.line(
    x='Date',
    y='Temperature'
)

#### Rolling Average
To reduce the effect of yearly fluctuations, we can use a *rolling average*. Polars provides the methode *rolling* to calculate the rolling average over a given window.

In [None]:
rolling_av = (yearly_means
              .rolling(index_column='Date', period='10y')
              .agg(pl.col('Temperature').mean())
             )

rolling_av.plot.line(
    x='Date',
    y='Temperature'
)
              

## Rainfall in Zurich
As an additional we can analyse it precipitation (column 'rre150d0') has also changed over the years.

In [None]:
precipitation = (df
    .select(
        pl.col('reference_timestamp')
            .str.to_date(format='%d.%m.%Y %H:%M')
            .alias('Date'), 
        pl.col('rre150d0')
            .alias('Precipitation')
    )
     )

precipitation

In [None]:
prec_roll = (precipitation
    .group_by_dynamic('Date', every='1y')
    .agg(pl.col('Precipitation').mean())
    .rolling(index_column='Date', period='10y')
    .agg(pl.col('Precipitation').mean())
            )

prec_roll.plot.line(
    x='Date',
    y='Precipitation'
)