In [3]:
import os
import pandas as pd
import zipfile

directory_path = "./Humidity"  # Directory containing ZIP files
output_file = os.path.join(directory_path, "humidity.csv")

# List all ZIP files in the directory
all_files = [os.path.join(directory_path, f) for f in os.listdir(directory_path) if f.endswith('.zip')]
dataframes = []

# Iterate through each ZIP file
for zip_file in all_files:
    with zipfile.ZipFile(zip_file, 'r') as z:
        # Iterate through the files in the ZIP archive
        for file in z.namelist():
            # Check if the file is a CSV and is not in a folder
            if file.endswith('.csv'):
                with z.open(file) as f:
                    df = pd.read_csv(f)
                    dataframes.append(df)

# Concatenate all DataFrames
final_df = pd.concat(dataframes, ignore_index=True)
# Save the combined DataFrame to a CSV file
final_df.to_csv(output_file, index=False)

In [None]:
import pandas as pd

# Load the CSV file into a DataFrame
file_path = 'humidity.csv'  # Replace with your actual file path
df = pd.read_csv(file_path)

# Convert the datetime column to datetime format
df['Date time'] = pd.to_datetime(df['Date time'])  # Change 'Date time' to your actual datetime column name

# Convert the temperature columns to numeric, forcing errors to NaN
df['Maximum Air Temperature Since Midnight(degree Celsius)'] = pd.to_numeric(
    df['Maximum Air Temperature Since Midnight(degree Celsius)'], errors='coerce'
)
df['Minimum Air Temperature Since Midnight(degree Celsius)'] = pd.to_numeric(
    df['Minimum Air Temperature Since Midnight(degree Celsius)'], errors='coerce'
)

# Set the datetime column as the DataFrame index (optional)
df.set_index('Date time', inplace=True)

# Assuming the station column is named 'Station' (change to your actual station column name)
station_column_name = 'Station'

# Group by the station and hour, then calculate the average for both columns
averaged_data = df.groupby([station_column_name, pd.Grouper(freq='H')]).agg(
    Average_Max_Temp=('Maximum Air Temperature Since Midnight(degree Celsius)', 'mean'),
    Average_Min_Temp=('Minimum Air Temperature Since Midnight(degree Celsius)', 'mean')
).reset_index()

# Renaming the columns for clarity
averaged_data.columns = ['Station', 'Hour', 'Average Max Temp', 'Average Min Temp']

# Save the averaged data to a new CSV file
output_file_path = 'averaged_temperature.csv'  # Change to your desired output file name
averaged_data.to_csv(output_file_path, index=False)

# Display the result
print("\nAveraged Values by Station and Hour saved to:", output_file_path)
print(averaged_data)