In [None]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import joblib

# Загрузка и обработка вашего CSV
df = pd.read_csv('weatherHistory.csv')
df = df.dropna(subset=['Precip Type'])
df['Rain'] = df['Precip Type'].apply(lambda x: 1 if x == 'rain' else 0)

# Преобразуем колонку в datetime
df['Formatted Date'] = pd.to_datetime(df['Formatted Date'], utc=True, errors='coerce')

# Удалим строки, где не удалось преобразовать дату
df = df.dropna(subset=['Formatted Date'])

# Теперь можно безопасно извлекать час
df['Hour'] = df['Formatted Date'].dt.hour

# Смещение на 6 часов вперёд
df = df.sort_values('Formatted Date').reset_index(drop=True)
df['Rain_in_6h'] = df['Rain'].shift(-6)
df = df.dropna(subset=['Rain_in_6h'])

# Признаки и целевая переменная
X = df[['Temperature (C)', 'Humidity', 'Pressure (millibars)', 'Hour']]
y = df['Rain_in_6h'].astype(int)

# Масштабирование
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Обучение
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, random_state=42)
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Сохранение
joblib.dump(model, 'rain_model_v161.pkl')
joblib.dump(scaler, 'rain_scaler_v161.pkl')


['rain_scaler_v161.pkl']