In [None]:
import multiprocessing
import pandas as pd
import numpy as np

# Helper function: Merge two sorted lists
def merge(left, right):
    sorted_list = []
    i = j = 0

    while i < len(left) and j < len(right):
        if left[i] < right[j]:
            sorted_list.append(left[i])
            i += 1
        else:
            sorted_list.append(right[j])
            j += 1

    sorted_list.extend(left[i:])
    sorted_list.extend(right[j:])
    return sorted_list

# Parallel Merge Sort function
def parallel_merge_sort(data):
    if len(data) <= 1:
        return data

    if len(data) < 1000:  # Threshold for switching to sequential sorting
        return sorted(data)

    mid = len(data) // 2

    with multiprocessing.Pool(processes=2) as pool:
        left, right = pool.map(parallel_merge_sort, [data[:mid], data[mid:]])

    return merge(left, right)

# Main function
if __name__ == "__main__":
    # Simulating weather data (daily temperatures for demonstration)
    np.random.seed(42)
    num_records = 10000  # Adjust the number of records for testing
    weather_data = {
        "Date": pd.date_range(start="2024-01-01", periods=num_records, freq="D"),
        "Temperature": np.random.uniform(-10, 40, size=num_records)  # Random temperatures in Celsius
    }

    # Convert to DataFrame
    df = pd.DataFrame(weather_data)
    print("Original Data:")
    print(df.head())

    # Extract temperature column for sorting
    temperatures = df["Temperature"].tolist()

    # Sort temperatures using Parallel Merge Sort
    print("\nSorting temperatures...")
    sorted_temperatures = parallel_merge_sort(temperatures)

    # Update the DataFrame with sorted temperatures
    df["Temperature"] = sorted_temperatures

    print("\nSorted Data:")
    print(df.head())

    # Save the sorted data to a CSV file
    df.to_csv("sorted_weather_data.csv", index=False)
    print("\nSorted data saved to 'sorted_weather_data.csv'.")


Original Data:
        Date  Temperature
0 2024-01-01     8.727006
1 2024-01-02    37.535715
2 2024-01-03    26.599697
3 2024-01-04    19.932924
4 2024-01-05    -2.199068

Sorting temperatures...
