In [14]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader

### Clean the data

In [16]:
# Read the NYC weather data
df = pd.read_csv('nyc_weather.csv')

# Keep only the specified columns
columns_to_keep = ['STATION', 'NAME', 'DATE', 'TMAX', 'TMIN']
df = df[columns_to_keep].copy()

# Convert DATE to datetime - ensure proper conversion
df['DATE'] = pd.to_datetime(df['DATE'], format='%Y-%m-%d', errors='coerce')

# Drop any rows where DATE conversion failed
df = df.dropna(subset=['DATE'])

# Sort by date
df = df.sort_values('DATE')

# Create features for time
df['year'] = df['DATE'].dt.year
df['month'] = df['DATE'].dt.month
df['day'] = df['DATE'].dt.day
df['day_of_week'] = df['DATE'].dt.dayofweek

clean_df.head()

Unnamed: 0,STATION,NAME,DATE,TMAX,TMIN,year,month,day,day_of_week
0,USW00094728,"NY CITY CENTRAL PARK, NY US",1869-12-01,62,33,1869,12,1,2
1,USW00094728,"NY CITY CENTRAL PARK, NY US",1869-12-02,33,24,1869,12,2,3
2,USW00094728,"NY CITY CENTRAL PARK, NY US",1869-12-03,29,16,1869,12,3,4
3,USW00094728,"NY CITY CENTRAL PARK, NY US",1869-12-04,37,15,1869,12,4,5
4,USW00094728,"NY CITY CENTRAL PARK, NY US",1869-12-05,39,35,1869,12,5,6


### Normalize Data

In [17]:
# Select features for the model
feature_columns = ['TMAX', 'TMIN', 'year', 'month', 'day', 'day_of_week']

# Normalize the features
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df[feature_columns])
scaled_df = pd.DataFrame(scaled_data, columns=feature_columns)

print()