In [3]:
from tasks import DataFetchingTask, DataCalculationTask
from external.client import YandexWeatherAPI

data_fetcher = DataFetchingTask(client=YandexWeatherAPI())
data_fetcher.run()

print('Available cities:', ', '.join(
    data_fetcher.city_to_forecasting_data.keys()
))

data_calculator = DataCalculationTask(
    data_fetcher.city_to_forecasting_data
)
data_calculator.run()

# for city, data in data_calculator.city_to_analyzed_days_info.items():
#     print(f'{city}: {data}')
data_calculator.city_to_analyzed_days_info



Available cities: BERLIN, MOSCOW, BEIJING, KAZAN, PARIS, SPETERSBURG, LONDON, NOVOSIBIRSK, VOLGOGRAD, WARSZAWA, ABUDHABI, BUCHAREST, ROMA, KALININGRAD, MADRID, CAIRO


{'BERLIN': [{'date': '2022-05-26',
   'hours_start': 9,
   'hours_end': 19,
   'hours_count': 11,
   'temp_avg': 19.273,
   'relevant_cond_hours': 9},
  {'date': '2022-05-27',
   'hours_start': 9,
   'hours_end': 19,
   'hours_count': 11,
   'temp_avg': 16.0,
   'relevant_cond_hours': 6},
  {'date': '2022-05-28',
   'hours_start': 9,
   'hours_end': 19,
   'hours_count': 11,
   'temp_avg': 13.636,
   'relevant_cond_hours': 0}],
 'MOSCOW': [{'date': '2022-05-26',
   'hours_start': 9,
   'hours_end': 19,
   'hours_count': 11,
   'temp_avg': 17.727,
   'relevant_cond_hours': 7},
  {'date': '2022-05-27',
   'hours_start': 9,
   'hours_end': 19,
   'hours_count': 11,
   'temp_avg': 13.091,
   'relevant_cond_hours': 0},
  {'date': '2022-05-28',
   'hours_start': 9,
   'hours_end': 19,
   'hours_count': 11,
   'temp_avg': 12.182,
   'relevant_cond_hours': 0},
  {'date': '2022-05-29',
   'hours_start': 9,
   'hours_end': 9,
   'hours_count': 1,
   'temp_avg': 12.0,
   'relevant_cond_hours': 1}

In [66]:
import pandas as pd

data = data_calculator.city_to_analyzed_days_info

# Создаем список словарей для данных
rows = []

# Заполняем список словарей данными
for city, city_data in data.items():
    for entry in city_data:
        rows.append({
            'city': city,
            'date': entry['date'],
            'hours_start': entry['hours_start'],
            'hours_end': entry['hours_end'],
            'hours_count': entry['hours_count'],
            'temp_avg': entry['temp_avg'],
            'relevant_cond_hours': entry['relevant_cond_hours']
        })

df = pd.DataFrame(rows)
df

Unnamed: 0,city,date,hours_start,hours_end,hours_count,temp_avg,relevant_cond_hours
0,BERLIN,2022-05-26,9,19,11,19.273,9
1,BERLIN,2022-05-27,9,19,11,16.0,6
2,BERLIN,2022-05-28,9,19,11,13.636,0
3,MOSCOW,2022-05-26,9,19,11,17.727,7
4,MOSCOW,2022-05-27,9,19,11,13.091,0
5,MOSCOW,2022-05-28,9,19,11,12.182,0
6,MOSCOW,2022-05-29,9,9,1,12.0,1
7,BEIJING,2022-05-26,9,19,11,31.818,11
8,BEIJING,2022-05-27,9,19,11,32.727,11
9,BEIJING,2022-05-28,9,19,11,33.818,11


In [67]:
# Группируем по полю 'city' и вычисляем средние значения
grouped_df = df.groupby('city').agg({'temp_avg': 'mean', 'relevant_cond_hours': 'mean'}).reset_index()
# Ранжируем города.
grouped_df['rating'] = grouped_df[['temp_avg','relevant_cond_hours']].apply(tuple, axis=1).rank(
    method='dense',
    ascending=False,
).astype(int)

grouped_df

Unnamed: 0,city,temp_avg,relevant_cond_hours,rating
0,ABUDHABI,34.27275,8.75,1
1,BEIJING,31.6325,9.75,3
2,BERLIN,16.303,5.0,9
3,BUCHAREST,24.841,8.5,5
4,CAIRO,33.394,11.0,2
5,KALININGRAD,13.272667,4.0,14
6,KAZAN,13.74975,2.5,13
7,LONDON,16.091,11.0,10
8,MOSCOW,13.75,2.0,12
9,NOVOSIBIRSK,23.20925,9.5,7


In [68]:
# Добавляем новые колонки средних значений в исходный DataFrame
df = df.merge(grouped_df, on='city', suffixes=('', '_mean'))

In [69]:
df

Unnamed: 0,city,date,hours_start,hours_end,hours_count,temp_avg,relevant_cond_hours,temp_avg_mean,relevant_cond_hours_mean,rating
0,BERLIN,2022-05-26,9,19,11,19.273,9,16.303,5.0,9
1,BERLIN,2022-05-27,9,19,11,16.0,6,16.303,5.0,9
2,BERLIN,2022-05-28,9,19,11,13.636,0,16.303,5.0,9
3,MOSCOW,2022-05-26,9,19,11,17.727,7,13.75,2.0,12
4,MOSCOW,2022-05-27,9,19,11,13.091,0,13.75,2.0,12
5,MOSCOW,2022-05-28,9,19,11,12.182,0,13.75,2.0,12
6,MOSCOW,2022-05-29,9,9,1,12.0,1,13.75,2.0,12
7,BEIJING,2022-05-26,9,19,11,31.818,11,31.6325,9.75,3
8,BEIJING,2022-05-27,9,19,11,32.727,11,31.6325,9.75,3
9,BEIJING,2022-05-28,9,19,11,33.818,11,31.6325,9.75,3


In [78]:
cities = df.groupby('city').first()[['temp_avg_mean', 'relevant_cond_hours_mean', 'rating']].sort_values(by='rating')[:1]

In [79]:
cities

Unnamed: 0_level_0,temp_avg_mean,relevant_cond_hours_mean,rating
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ABUDHABI,34.27275,8.75,1


In [93]:
for row in cities.iterrows():
    print(row['relevant_cond_hours_mean'])

TypeError: tuple indices must be integers or slices, not str