In [58]:
import pandas as pd

df = pd.read_csv('./data/temperature_data.csv', parse_dates=['timestamp'])


def get_moving_average(temperature: pd.Series, window: int = 30) -> pd.Series:
    return temperature.rolling(window=window).mean()


def process_city(city_df: pd.DataFrame) -> pd.DataFrame:
    df = city_df.copy()
    df['year'] = df['timestamp'].dt.year
    df['ma30'] = get_moving_average(df.temperature)
    df['season_code'] = (df.season != df.season.shift()).cumsum()

    season_stats = df.groupby('season_code')['temperature'].agg(['mean', 'std'])
    
    res_df = df.merge(season_stats, on=['season_code'], how='left')

    res_df['is_anomaly'] = res_df.temperature.abs() > res_df['mean'].abs() + 2 * res_df['std']
    return res_df


msk_df = df[df.city == 'Moscow']
processed_df = process_city(msk_df)

stats = processed_df.groupby(['year', 'season']).agg(
    mean_temp=('temperature', 'mean'),
    min_temp=('temperature', 'min'),
    max_temp=('temperature', 'max'),
    anomaly_count=('is_anomaly', 'sum')
).reset_index()

stats

Unnamed: 0,year,season,mean_temp,min_temp,max_temp,anomaly_count
0,2010,autumn,8.122151,-4.946487,20.818913,3
1,2010,spring,5.708459,-4.617743,18.856079,3
2,2010,summer,18.07875,5.012729,28.957293,1
3,2010,winter,-9.274879,-18.350229,0.660484,0
4,2011,autumn,7.560012,-7.292132,20.004949,2
5,2011,spring,5.103052,-10.620465,13.587962,0
6,2011,summer,18.628147,7.160505,33.181661,2
7,2011,winter,-11.671966,-27.856712,-0.977039,3
8,2012,autumn,7.880816,-3.357227,22.078496,2
9,2012,spring,5.246625,-6.689407,17.151765,3
