In [38]:
import pandas as pd
import numpy as np
from scipy.stats import linregress

Код для генерации данных

In [None]:
# Реальные средние температуры (примерные данные) для городов по сезонам
seasonal_temperatures = {
    "New York": {"winter": 0, "spring": 10, "summer": 25, "autumn": 15},
    "London": {"winter": 5, "spring": 11, "summer": 18, "autumn": 12},
    "Paris": {"winter": 4, "spring": 12, "summer": 20, "autumn": 13},
    "Tokyo": {"winter": 6, "spring": 15, "summer": 27, "autumn": 18},
    "Moscow": {"winter": -10, "spring": 5, "summer": 18, "autumn": 8},
    "Sydney": {"winter": 12, "spring": 18, "summer": 25, "autumn": 20},
    "Berlin": {"winter": 0, "spring": 10, "summer": 20, "autumn": 11},
    "Beijing": {"winter": -2, "spring": 13, "summer": 27, "autumn": 16},
    "Rio de Janeiro": {"winter": 20, "spring": 25, "summer": 30, "autumn": 25},
    "Dubai": {"winter": 20, "spring": 30, "summer": 40, "autumn": 30},
    "Los Angeles": {"winter": 15, "spring": 18, "summer": 25, "autumn": 20},
    "Singapore": {"winter": 27, "spring": 28, "summer": 28, "autumn": 27},
    "Mumbai": {"winter": 25, "spring": 30, "summer": 35, "autumn": 30},
    "Cairo": {"winter": 15, "spring": 25, "summer": 35, "autumn": 25},
    "Mexico City": {"winter": 12, "spring": 18, "summer": 20, "autumn": 15},
}

# Сопоставление месяцев с сезонами
month_to_season = {12: "winter", 1: "winter", 2: "winter",
                   3: "spring", 4: "spring", 5: "spring",
                   6: "summer", 7: "summer", 8: "summer",
                   9: "autumn", 10: "autumn", 11: "autumn"}

# Генерация данных о температуре
def generate_realistic_temperature_data(cities, num_years=10):
    dates = pd.date_range(start="2010-01-01", periods=365 * num_years, freq="D")
    data = []

    for city in cities:
        for date in dates:
            season = month_to_season[date.month]
            mean_temp = seasonal_temperatures[city][season]
            # Добавляем случайное отклонение
            temperature = np.random.normal(loc=mean_temp, scale=5)
            data.append({"city": city, "timestamp": date, "temperature": temperature})

    df = pd.DataFrame(data)
    df['season'] = df['timestamp'].dt.month.map(lambda x: month_to_season[x])
    return df

# Генерация данных
data = generate_realistic_temperature_data(list(seasonal_temperatures.keys()))
data.to_csv('temperature_data.csv', index=False)

In [5]:
# Загрузка данных
df = pd.read_csv('temperature_data.csv')
df.head()

Unnamed: 0,city,timestamp,temperature,season
0,New York,2010-01-01,7.162237,winter
1,New York,2010-01-02,8.048603,winter
2,New York,2010-01-03,-2.443189,winter
3,New York,2010-01-04,3.400037,winter
4,New York,2010-01-05,8.778664,winter


In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 54750 entries, 0 to 54749
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   city         54750 non-null  object        
 1   timestamp    54750 non-null  datetime64[ns]
 2   temperature  54750 non-null  float64       
 3   season       54750 non-null  object        
dtypes: datetime64[ns](1), float64(1), object(2)
memory usage: 1.7+ MB


In [11]:
df['timestamp'] = pd.to_datetime(df['timestamp'])

In [15]:
df = df.sort_values(['city', 'season', 'timestamp']).reset_index(drop=True)
df['temp_smooth'] = df.groupby(['city', 'season'])['temperature'].transform(
    lambda x: x.rolling(window=30, center=True).mean()
)

group_stats = df.groupby(['city', 'season'])['temp_smooth'].agg(
    mean_smooth = 'mean',
    std_smooth = 'std'
).reset_index()

In [16]:
group_stats.head(10)

Unnamed: 0,city,season,mean_smooth,std_smooth
0,Beijing,autumn,15.863452,0.799021
1,Beijing,spring,13.110585,0.900612
2,Beijing,summer,27.016987,1.036281
3,Beijing,winter,-1.882409,0.95951
4,Berlin,autumn,10.709098,0.909325
5,Berlin,spring,9.63402,0.944654
6,Berlin,summer,20.121489,0.855931
7,Berlin,winter,0.093373,0.849267
8,Cairo,autumn,25.02529,0.830312
9,Cairo,spring,24.954015,0.76053


In [33]:
df = df.merge(group_stats, on=['city', 'season'], how='left')

In [35]:
df['temp_anomaly'] = np.abs(df['temperature'] - df['mean_smooth']) > 2*df['std_smooth']

In [36]:
df.sample(20)

Unnamed: 0,city,timestamp,temperature,season,temp_smooth,mean_smooth,std_smooth,temp_anomaly
23751,Mexico City,2010-06-22,15.706814,summer,20.82311,20.214855,0.871142,True
5521,Berlin,2010-07-12,21.437486,summer,19.346994,20.121489,0.855931,False
47048,Singapore,2015-02-17,27.585959,winter,28.302191,26.690725,1.017734,False
11487,Dubai,2015-11-22,24.610707,autumn,30.417008,29.947209,0.921835,True
3646,Beijing,2019-12-26,0.211806,winter,,-1.882409,0.95951,True
13290,Dubai,2015-07-21,41.804404,summer,40.070598,40.227764,0.814499,False
27882,Moscow,2015-07-13,10.563154,summer,17.08681,17.73042,0.907204,True
13484,Dubai,2017-07-31,39.44186,summer,39.776052,40.227764,0.814499,False
29893,Mumbai,2017-10-27,25.970906,autumn,30.537647,30.254439,0.803681,True
31947,Mumbai,2019-08-29,34.852887,summer,,35.087658,0.799168,False


In [None]:
def analysis_city(df_city):
  city = df_city['city'].iloc[0]
  df = df_city.copy().sort_values('date').reset_index(drop=True)

  # 1.1 Скользящее среднее и std, поиск аномалий
  df['temp_smooth'] = df.groupby(['season'])['temperature'].transform(
    lambda x: x.rolling(window=30, center=True).mean()
  )

  smooth_season_profile = df.groupby(['season'])['temp_smooth'].agg(
    temp_mean_smooth = 'mean',
    temp_std_smooth = 'std'
  ).reset_index()

  df = df.merge(group_stats, on=['season'], how='left')
  df['temp_anomaly'] = np.abs(df['temperature'] - df['temp_mean_smooth']) > 2*df['temp_std_smooth']
  temp_anomaly = df[df['temp_anomaly']].copy()[['timestamp', 'temperature', 'temp_smooth', 'temp_smooth_std']]

  # 1.2 Сезонный профиль: mean, std по сезонам (без скользящего окна)
  season_profile = df.groupby(['season'])['temperature'].agg(
    temp_mean = 'mean',
    temp_std = 'std'
  ).reset_index()

  # 1.3 Тренд
  df['day_index'] = np.arange(len(df))
  slope, intercept, r_value, p_value, std_err = linregress(df['day_index'], df['temperature'])

  if abs(slope) < 1e-8 or p_value > 0.05:
    trend = 'neutral'
  elif slope > 0:
    trend = 'positive'
  else:
    trend = 'negative'

  # 1.4 Общая статистика
  avg_temp = df['temp'].mean()
  min_temp = df['temp'].min()
  max_temp = df['temp'].max()

  return {
      'city': city,
      'avg_temp': avg_temp,
      'min_temp': min_temp,
      'max_temp': max_temp,
      'season_profile': season_profile,
      'trend': trend,
      'temp_anomaly': temp_anomaly
  }
