In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# Function to read CSV data
def read_csv(file_path):
    return pd.read_csv(file_path, encoding='utf-8')

# Function to parse date and extract season
def get_season(month):
    if month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Spring'
    elif month in [6, 7, 8]:
        return 'Summer'
    else:
        return 'Autumn'

# Function to process pollutant data with both date and time
def process_pollutant_data(file_path):
    df = read_csv(file_path)
    # Update to include both date and time for parsing
    df['Date'] = pd.to_datetime(df['Date/heure'], format='%d.%m.%Y %H:%M')
    df['Season'] = df['Date'].apply(lambda x: get_season(x.month))
    return df

# Function to plot scatter plots for pollutants
def plot_scatter(df, pollutant1, pollutant2, season):
    plt.figure(figsize=(10, 6))
    sns.scatterplot(data=df[df['Season'] == season], x=pollutant1, y=pollutant2)
    plt.title(f'Scatter Plot of {pollutant1} vs {pollutant2} ({season})')
    plt.xlabel(pollutant1)
    plt.ylabel(pollutant2)
    plt.show()

# Loading the pollutant data (PM10, O3, NO2, etc.)
pm10_data = process_pollutant_data('PM10.csv')
o3_data = process_pollutant_data('O3.csv')
no2_data = process_pollutant_data('NO2.csv')
prec_data = process_pollutant_data('PREC.csv')
rad_data = process_pollutant_data('RAD.csv')
temp_data = process_pollutant_data('TEMP.csv')

# Loading the wind data
wind_data = process_wind_data('Wind Speed and Direction.csv')

# List of pollutants to compare
pollutants = ['PM10', 'O3', 'NO2', 'PREC', 'RAD', 'TEMP']

# For each season, create scatter plots for combinations of pollutants
seasons = ['Winter', 'Spring', 'Summer', 'Autumn']

for season in seasons:
    for i in range(len(pollutants)):
        for j in range(i+1, len(pollutants)):
            plot_scatter(pm10_data, pollutants[i], pollutants[j], season)

# For wind data, you can also create scatter plots for wind speed over time by season
for season in seasons:
    plt.figure(figsize=(10, 6))
    sns.scatterplot(data=wind_data[wind_data['Season'] == season], x='Date', y='vCHA', label='Wind Speed (CHA)')
    plt.title(f'Wind Speed vs Time ({season})')
    plt.xlabel('Time')
    plt.ylabel('Wind Speed (CHA)')
    plt.show()

ValueError: time data "01.01.2024" doesn't match format "%d.%m.%Y %H:%M", at position 0. You might want to try:
    - passing `format` if your strings have a consistent format;
    - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;
    - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.