In [None]:
import pandas as pd

# Load the raw NSRDB CSV file (upload manually to Colab or via path)
weather_raw = pd.read_csv("/content/weather_csv.csv")

# Quick preview
weather_raw.head()


Unnamed: 0,Year,Month,Day,Hour,Minute,Temperature,DHI,GHI,DNI,Wind Speed
0,2023,1,1,0,0,7.4,0,0,0,1.6
1,2023,1,1,1,0,6.7,0,0,0,1.6
2,2023,1,1,2,0,6.4,0,0,0,1.8
3,2023,1,1,3,0,6.0,0,0,0,1.8
4,2023,1,1,4,0,5.9,0,0,0,1.9


In [None]:
# Combine Year, Month, Day, Hour, Minute into a datetime column
weather_raw['time'] = pd.to_datetime(weather_raw[['Year', 'Month', 'Day', 'Hour', 'Minute']])

# Optional: sort just in case
weather_raw = weather_raw.sort_values('time').reset_index(drop=True)

# Drop the original split columns (optional)
weather = weather_raw.drop(columns=['Year', 'Month', 'Day', 'Hour', 'Minute'])

# Reorder: time first
cols = ['time'] + [col for col in weather.columns if col != 'time']
weather = weather[cols]

weather.head()


Unnamed: 0,time,Temperature,DHI,GHI,DNI,Wind Speed
0,2023-01-01 00:00:00,7.4,0,0,0,1.6
1,2023-01-01 01:00:00,6.7,0,0,0,1.6
2,2023-01-01 02:00:00,6.4,0,0,0,1.8
3,2023-01-01 03:00:00,6.0,0,0,0,1.8
4,2023-01-01 04:00:00,5.9,0,0,0,1.9


In [None]:
# Clean up column names to match pvlib expectations later
weather = weather.rename(columns={
    'GHI': 'ghi',
    'DNI': 'dni',
    'DHI': 'dhi',
    'Temperature': 'temp_air',
    'Wind Speed': 'wind_speed'
})

# Ensure all columns are numeric
for col in ['ghi', 'dni', 'dhi', 'temp_air', 'wind_speed']:
    weather[col] = pd.to_numeric(weather[col], errors='coerce')

weather.head()


Unnamed: 0,time,temp_air,dhi,ghi,dni,wind_speed
0,2023-01-01 00:00:00,7.4,0,0,0,1.6
1,2023-01-01 01:00:00,6.7,0,0,0,1.6
2,2023-01-01 02:00:00,6.4,0,0,0,1.8
3,2023-01-01 03:00:00,6.0,0,0,0,1.8
4,2023-01-01 04:00:00,5.9,0,0,0,1.9


In [None]:
# Save cleaned weather DataFrame
weather.to_csv("nsrdb_weather_nyc_2023_hourly.csv", index=False)
print(" Weather data saved as 'nsrdb_weather_nyc_2023_hourly.csv'")


 Weather data saved as 'nsrdb_weather_nyc_2023_hourly.csv'
