In [1]:
import pandas as pd


In [37]:
import typing as tp
import numpy as np
from collections import defaultdict
from pydantic import BaseModel


class WindDescription(BaseModel):
    isnan: bool
    changed: bool
    x_rad: float
    y_rad: float


def dd_preparation(description: tp.Optional[str]) -> WindDescription:
    '''
    :param description: string description of wind
    :return: WindDescription object
    '''
    if description is None or not isinstance(description, str):
        return WindDescription(isnan=True, changed=False, x_rad=0., y_rad=0.)
    
    prepared_description = description.lower()

    if 'переменное направление'.lower() in prepared_description:
        return WindDescription(isnan=False, changed=True, x_rad=0., y_rad=0.)

    if 'штиль' in prepared_description or 'безветрие' in prepared_description:
        return WindDescription(isnan=False, changed=False, x_rad=0., y_rad=0.)

    dest_to_rad = {'восток': 0,
                   'север': np.pi / 2,
                   'запад': np.pi,
                   'юг': 3 * np.pi / 2}
    
    counts = {k: prepared_description.count(k) for k in dest_to_rad if prepared_description.count(k) > 0}
    
    count = sum((v for v in counts.values()))
    angle = sum((c * dest_to_rad[dest] for dest, c in counts.items()))
    
    if 'восток' in counts and 'юг' in counts:
        angle += counts['восток'] * 2 * np.pi
    
    res_angle = angle / count
    
    return WindDescription(isnan=False, changed=False, x_rad=np.cos(res_angle), y_rad=np.sin(res_angle)).dict()

In [40]:
pd.json_normalize(weather_data.DD.map(dd_preparation))

Unnamed: 0,isnan,changed,x_rad,y_rad
0,False,False,-5.000000e-01,8.660254e-01
1,False,False,-7.071068e-01,7.071068e-01
2,False,False,-7.071068e-01,7.071068e-01
3,False,False,-7.071068e-01,7.071068e-01
4,False,False,-1.836970e-16,-1.000000e+00
...,...,...,...,...
356501,False,False,-1.000000e+00,1.224647e-16
356502,False,False,-1.000000e+00,1.224647e-16
356503,False,False,-8.660254e-01,5.000000e-01
356504,False,False,-8.660254e-01,5.000000e-01


In [36]:
pd.DataFrame.from_records(weather_data.DD.map(dd_preparation))

AttributeError: 'WindDescription' object has no attribute 'keys'

In [31]:
weather_data = pd.read_csv('../data/weather_train.csv', sep=';')
weather_data.head()

  weather_data = pd.read_csv('../data/weather_train.csv', sep=';')


Unnamed: 0.1,Unnamed: 0,local_time,T,Po,P,Pa,U,DD,Ff,ff10,...,Ch,VV,Td,RRR,tR,E,Tg,E',sss,station_id
0,0,31.12.2013 21:00,-1.5,758.9,773.2,0.7,99.0,"Ветер, дующий с северо-северо-запада",2.0,,...,,6.0,-1.6,,,,,,,34123
1,1,31.12.2013 18:00,-1.7,758.2,772.5,0.7,99.0,"Ветер, дующий с северо-запада",3.0,,...,,3.0,-1.9,Осадков нет,12.0,,,,,34123
2,2,31.12.2013 15:00,-1.7,757.5,771.8,0.0,99.0,"Ветер, дующий с северо-запада",4.0,,...,,3.0,-1.9,,,,,,,34123
3,3,31.12.2013 12:00,-1.2,757.5,771.7,0.8,99.0,"Ветер, дующий с северо-запада",3.0,,...,,2.5,-1.4,,,,,,,34123
4,4,31.12.2013 09:00,-1.2,756.7,770.9,1.2,99.0,"Ветер, дующий с юга",3.0,,...,,0.5,-1.4,,,,,,,34123
