In [2]:
import pandas as pd

# Load the dataset
file_path = "cleaned_weather_data.csv"
df = pd.read_csv(file_path)

# Selecting required columns
columns_needed = [
    "City", "Rain Volume (last 3h) cleaned", "Snow Volume (last 3h) cleaned",
    "Month-Day", "Time", "Temperature", "Humidity", "Pressure", "Wind Speed", "Visibility"
]
df = df[columns_needed]

In [3]:
# Define binning functions for numeric columns
def bin_temperature(temp):
    if temp < 0:
        return "Temp_Cold"
    elif 0 <= temp <= 15:
        return "Temp_Cool"
    elif 15 < temp <= 30:
        return "Temp_Warm"
    else:
        return "Temp_Hot"

def bin_humidity(humidity):
    if humidity < 30:
        return "Humidity_Low"
    elif 30 <= humidity <= 70:
        return "Humidity_Moderate"
    else:
        return "Humidity_High"

def bin_pressure(pressure):
    if pressure < 1000:
        return "Pressure_Low"
    elif 1000 <= pressure <= 1020:
        return "Pressure_Normal"
    else:
        return "Pressure_High"

def bin_wind_speed(speed):
    if speed < 5:
        return "Wind_Calm"
    elif 5 <= speed <= 15:
        return "Wind_Moderate"
    else:
        return "Wind_Strong"

def bin_visibility(vis):
    if vis < 2000:
        return "Visibility_Low"
    elif 2000 <= vis <= 10000:
        return "Visibility_Moderate"
    else:
        return "Visibility_High"

In [4]:
# Apply binning functions
df["Temperature_Bin"] = df["Temperature"].apply(bin_temperature)
df["Humidity_Bin"] = df["Humidity"].apply(bin_humidity)
df["Pressure_Bin"] = df["Pressure"].apply(bin_pressure)
df["WindSpeed_Bin"] = df["Wind Speed"].apply(bin_wind_speed)
df["Visibility_Bin"] = df["Visibility"].apply(bin_visibility)

# Drop original numeric columns
df = df.drop(["Temperature", "Humidity", "Pressure", "Wind Speed", "Visibility"], axis=1)

In [5]:
# Convert into transaction format (list of lists)
transactions = df.apply(lambda x: list(x), axis=1).tolist()

# Save transactions to a CSV file without headers for use in R
transaction_file = "ARM/transactions.csv"
df.to_csv(transaction_file, index=False, header=False)
print(f"Transaction data saved at: {transaction_file}")

Transaction data saved at: ARM/transactions.csv
