# Newark Hourly Weather

## Combining Datasets

In [2]:
import pandas as pd
path = "Data/Predict With - X/Weather/Newark/"
df1 = pd.read_csv(path + "hourly_weather_newark_2019.csv")
df2 = pd.read_csv(path + "hourly_weather_newark_2020.csv")
df3 = pd.read_csv(path + "hourly_weather_newark_2021.csv")
df4 = pd.read_csv(path + "hourly_weather_newark_2022.csv")
df5 = pd.read_csv(path + "hourly_weather_newark_2023.csv")
df6 = pd.read_csv(path + "hourly_weather_newark_2024.csv")

merged_df = pd.concat([df1, df2, df3, df4, df5, df6])
merged_df

Unnamed: 0,Time,Temperature,Dew Point,Humidity,Wind,Wind Speed,Wind Gust,Pressure,Precipitation,Condition
0,2019-01-01 1:51 AM,46F,46F,100%,CALM,0mph,0mph,29.73in,0.1in,Light Rain
1,2019-01-01 2:51 AM,47F,46F,97%,CALM,0mph,0mph,29.67in,0.0in,Light Rain
2,2019-01-01 3:45 AM,46F,46F,100%,SW,3mph,0mph,29.62in,0.0in,Fog
3,2019-01-01 3:51 AM,47F,47F,100%,WSW,5mph,0mph,29.62in,0.0in,Fog
4,2019-01-01 4:15 AM,47F,47F,100%,W,6mph,0mph,29.63in,0.0in,Light Rain
...,...,...,...,...,...,...,...,...,...,...
9818,12/31/2024 22:51,46F,43F,89%,NE,13mph,0mph,29.55in,0.4in,T-Storm
9819,12/31/2024 23:05,46F,43F,89%,NE,10mph,22mph,29.53in,0.1in,Light Rain with Thunder
9820,12/31/2024 23:51,46F,43F,89%,NE,13mph,0mph,29.49in,0.1in,Light Rain
9821,12/31/2024 0:26,47F,44F,90%,N,5mph,0mph,29.49in,0.0in,Cloudy


## Cleaning

In [3]:
# Remove NAs

print(f"NA before:\n{merged_df.isnull().sum()}")
print(f"\nTotal Row: {len(merged_df)}\n")

merged_df.dropna(inplace=True)

print(f"NA after:\n{merged_df.isnull().sum()}")
print(f"\nTotal Row: {len(merged_df)}\n")

# Remove units
merged_df["Temperature"] = merged_df["Temperature"].replace(r"[^\d.]", "", regex=True).astype(float)
merged_df["Dew Point"] = merged_df["Dew Point"].replace(r"[^\d.]", "", regex=True).astype(float)
merged_df["Humidity"] = merged_df["Humidity"].replace(r"[^\d.]", "", regex=True).astype(float)
merged_df["Wind Speed"] = merged_df["Wind Speed"].replace(r"[^\d.]", "", regex=True).astype(float)
merged_df["Wind Gust"] = merged_df["Wind Gust"].replace(r"[^\d.]", "", regex=True).astype(float)
merged_df["Pressure"] = merged_df["Pressure"].replace(r"[^\d.]", "", regex=True).astype(float)
merged_df["Precipitation"] = merged_df["Precipitation"].replace(r"[^\d.]", "", regex=True).astype(float)

# Normalize time
merged_df["Time"] = pd.to_datetime(merged_df["Time"], format='mixed')

# Rename columns (Should be placed last)
merged_df.rename(
    columns={
        "Time": "Date",
        "Temperature": "Temperature (F)",
        "Dew Point": "Dew Point (F)",
        "Humidity": "Humidity (%)",
        "Wind Speed": "Wind Speed (mph)",
        "Wind Gust": "Wind Gust (mph)",
        "Pressure": "Pressure (in)",
        "Precipitation": "Precipitation (in)"
    },
    inplace=True
)

# Download merged dataframe
merged_df.to_csv("Data/hourly_weather_newark.csv", index=False)

merged_df

NA before:
Time              1
Temperature       1
Dew Point         1
Humidity          1
Wind             76
Wind Speed        1
Wind Gust         1
Pressure          1
Precipitation     1
Condition         2
dtype: int64

Total Row: 59788

NA after:
Time             0
Temperature      0
Dew Point        0
Humidity         0
Wind             0
Wind Speed       0
Wind Gust        0
Pressure         0
Precipitation    0
Condition        0
dtype: int64

Total Row: 59712



Unnamed: 0,Date,Temperature (F),Dew Point (F),Humidity (%),Wind,Wind Speed (mph),Wind Gust (mph),Pressure (in),Precipitation (in),Condition
0,2019-01-01 01:51:00,46.0,46.0,100.0,CALM,0.0,0.0,29.73,0.1,Light Rain
1,2019-01-01 02:51:00,47.0,46.0,97.0,CALM,0.0,0.0,29.67,0.0,Light Rain
2,2019-01-01 03:45:00,46.0,46.0,100.0,SW,3.0,0.0,29.62,0.0,Fog
3,2019-01-01 03:51:00,47.0,47.0,100.0,WSW,5.0,0.0,29.62,0.0,Fog
4,2019-01-01 04:15:00,47.0,47.0,100.0,W,6.0,0.0,29.63,0.0,Light Rain
...,...,...,...,...,...,...,...,...,...,...
9818,2024-12-31 22:51:00,46.0,43.0,89.0,NE,13.0,0.0,29.55,0.4,T-Storm
9819,2024-12-31 23:05:00,46.0,43.0,89.0,NE,10.0,22.0,29.53,0.1,Light Rain with Thunder
9820,2024-12-31 23:51:00,46.0,43.0,89.0,NE,13.0,0.0,29.49,0.1,Light Rain
9821,2024-12-31 00:26:00,47.0,44.0,90.0,N,5.0,0.0,29.49,0.0,Cloudy
