In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import scipy.interpolate as interpolate
from PIL import Image

In [2]:
locations = ['SLC', 'SFO','HNL', 'ANC']
filenames = ["../data/Weather_" + location + "_2023.csv" for location in locations]

# List to store DataFrames
df_list = []

# Read each CSV file into a DataFrame and append to the list
for filename in filenames:
    df = pd.read_csv(filename, delimiter=',')
    keep = ['DATE', 'HourlyDryBulbTemperature', 'HourlyPrecipitation', 'HourlyWindSpeed','HourlyWindDirection','HourlyVisibility']
    df = df[keep]
    df = df.fillna(method='ffill')
    df['timestamp']=pd.to_datetime(df['DATE'], format='%Y-%m-%dT%H:%M:%S')
    df.set_index(df['timestamp'], inplace=True)
    df.drop(columns=['DATE'], inplace=True)
    # Drop the 'STATION' column
    df.drop(columns=['timestamp'], inplace=True)
    # Convert object columns to numeric
    df = df.apply(pd.to_numeric, errors='coerce')
    # Replace NaN values with 0.0
    df.fillna(0.0, inplace=True)
    df = df.resample('60min').max()
    start = pd.to_datetime('2022-12-21 22:00')
    end = pd.to_datetime('2023-12-21 21:00')
    df = df.loc[(df.index >= start) & (df.index <= end)]
    df_list.append(df)

# Display the tail of each DataFrame to confirm loading
for i, location in enumerate(locations):
    print(f"Tail of DataFrame for {location}:")
    print(df_list[i].tail(5))
    print("\n")

  df = pd.read_csv(filename, delimiter=',')
  df = df.fillna(method='ffill')
  df = pd.read_csv(filename, delimiter=',')
  df = df.fillna(method='ffill')
  df = pd.read_csv(filename, delimiter=',')
  df = df.fillna(method='ffill')


Tail of DataFrame for SLC:
                     HourlyDryBulbTemperature  HourlyPrecipitation  \
timestamp                                                            
2023-12-21 17:00:00                      44.0                  0.0   
2023-12-21 18:00:00                      39.0                  0.0   
2023-12-21 19:00:00                      39.0                  0.0   
2023-12-21 20:00:00                      39.0                  0.0   
2023-12-21 21:00:00                      39.0                  0.0   

                     HourlyWindSpeed  HourlyWindDirection  HourlyVisibility  
timestamp                                                                    
2023-12-21 17:00:00              6.0                270.0              8.00  
2023-12-21 18:00:00              6.0                250.0              7.00  
2023-12-21 19:00:00              0.0                  0.0              7.00  
2023-12-21 20:00:00              3.0                160.0              6.84  
2023-12-21 21:

  df = pd.read_csv(filename, delimiter=',')
  df = df.fillna(method='ffill')


In [4]:
df_list[0].columns

Index(['HourlyDryBulbTemperature', 'HourlyPrecipitation', 'HourlyWindSpeed',
       'HourlyWindDirection', 'HourlyVisibility'],
      dtype='object')

In [19]:
# Columns to normalize
columns_to_normalize = ['HourlyDryBulbTemperature', 'HourlyPrecipitation', 'HourlyWindSpeed', 'HourlyWindDirection', 'HourlyVisibility']

# Combine the columns from all DataFrames to find global min and max
combined_df = pd.concat([df[columns_to_normalize] for df in df_list])

# Compute global min and max for each column
global_min = combined_df.min()
global_max = combined_df.max()

# Normalize each DataFrame using the global min and max
normalized_df_list = []
for df in df_list:
    normalized_df = df.copy()
    for column in columns_to_normalize:
        normalized_df[column] = ((df[column] - global_min[column]) / (global_max[column] - global_min[column])) * 255
        # Replace non-finite values with 0.0
        normalized_df[column] = np.nan_to_num(normalized_df[column], nan=0.0, posinf=0.0, neginf=0.0)
        normalized_df[column] = normalized_df[column].astype(np.uint8)
    normalized_df_list.append(normalized_df)

# Display the head of each normalized DataFrame to confirm changes
for i, location in enumerate(locations):
    print(f"Head of normalized DataFrame for {location}:")
    print(normalized_df_list[i].head(5))
    print("\n")

Head of normalized DataFrame for SLC:
                     HourlyDryBulbTemperature  HourlyPrecipitation  \
timestamp                                                            
2022-12-21 22:00:00                        85                    3   
2022-12-21 23:00:00                        83                    1   
2022-12-22 00:00:00                        78                    1   
2022-12-22 01:00:00                        73                    0   
2022-12-22 02:00:00                        73                    0   

                     HourlyWindSpeed  HourlyWindDirection  HourlyVisibility  
timestamp                                                                    
2022-12-21 22:00:00               46                  205                54  
2022-12-21 23:00:00               46                  226                54  
2022-12-22 00:00:00               57                  247                54  
2022-12-22 01:00:00               88                  247                60  
202

In [None]:
columns_to_normalize = ['HourlyDryBulbTemperature', 'HourlyPrecipitation', 'HourlyWindSpeed', 'HourlyWindDirection']

for i, df in enumerate(normalized_df_list):
    location = locations[i]
    for ch in columns_to_normalize:
        # Transform values if needed (e.g., log transformation)
        # In this example, we directly use the values
        transformed_values = df[ch]

        # Normalize the transformed values to the range 0-255
        min_val = transformed_values.min()
        max_val = transformed_values.max()
        normalized_values = ((transformed_values - min_val) / (max_val - min_val)) * 255

        # Replace non-finite values with 0.0
        normalized_values = np.nan_to_num(normalized_values, nan=0.0, posinf=0.0, neginf=0.0)
        normalized_values = normalized_values.astype(np.uint8)

        # Create an image
        height = len(normalized_values)
        image = Image.new('L', (1, height))  # 'L' mode for grayscale
        image.putdata(normalized_values)

        # Scale the image to 200 pixels wide
        scaled_image = image.resize((200, height))

        # Save the image
        image_filename = f'../img/{location}_{ch}_image_scaled.png'
        scaled_image.save(image_filename)

        # Optionally, display the image
        scaled_image.show()

        print(f'Saved image for {ch} in {location} to {image_filename}')

In [113]:

channel = ['HourlyDryBulbTemperature', 'HourlyPrecipitation', 'HourlyWindSpeed', 'HourlyWindDirection']
for ch in channel:
    
    # In this example, we directly use the values
    transformed_values = df_2023[ch]

    # Normalize the transformed values to the range 0-255
    min_val = transformed_values.min()
    max_val = transformed_values.max()
    normalized_values = ((transformed_values - min_val) / (max_val - min_val)) * 255

    # Replace non-finite values with 0.0
    normalized_values = np.nan_to_num(normalized_values, nan=0.0, posinf=0.0, neginf=0.0)
    normalized_values = normalized_values.astype(np.uint8)

    # Create an image
    height = len(normalized_values)
    image = Image.new('L', (1, height))  # 'L' mode for grayscale
    image.putdata(normalized_values)

    # Scale the image to 200 pixels wide
    scaled_image = image.resize((200, height))

    # Save the image
    image_filename = f'../img/{locations}_{ch}_image_scaled.png'
    scaled_image.save(image_filename)

    # Optionally, display the image
    scaled_image.show()

    print(f'Saved image for {ch} to {image_filename}')


Saved image for HourlyDryBulbTemperature to ../img/ANC_HourlyDryBulbTemperature_image_scaled.png
Saved image for HourlyPrecipitation to ../img/ANC_HourlyPrecipitation_image_scaled.png
Saved image for HourlyWindSpeed to ../img/ANC_HourlyWindSpeed_image_scaled.png
Saved image for HourlyWindDirection to ../img/ANC_HourlyWindDirection_image_scaled.png
