# Data Visualization Notebook

This notebook creates visualizations from the processed weather data.

In [None]:
# Cell 1: Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import boto3
from io import StringIO
import os
from matplotlib.dates import DateFormatter

# Set style
plt.style.use('ggplot')
sns.set_palette("Set2")


In [None]:
# Cell 2: Read processed data from MinIO
try:
    # Set up the MinIO/S3 client
    s3_client = boto3.client(
        's3',
        endpoint_url='http://minio.minio-system.svc.cluster.local:9000',
        aws_access_key_id='minio', 
        aws_secret_access_key='minio123',
        region_name='us-east-1'  # Can be any region, doesn't matter for MinIO
    )
    
    directory = 'weather-dataprocessing-0519232703'  # Match the directory in your DAG
    input_file = 'processed_weather_data.csv'
    
    # Get the file from MinIO
    print(f"Attempting to read {input_file} from MinIO")
    response = s3_client.get_object(
        Bucket='elyra-airflow',
        Key=f'{directory}/{input_file}'
    )
    
    # Read the content and convert to DataFrame
    content = response['Body'].read().decode('utf-8')
    df = pd.read_csv(StringIO(content))
    
    # Convert date to datetime
    df['date'] = pd.to_datetime(df['date'])
    
    print(f"Successfully loaded {len(df)} days of processed weather data")
    print(df.head())
    
except Exception as e:
    print(f"Error reading processed data from MinIO: {e}")
    
    # Create sample data as fallback
    print("Creating sample data for visualization")
    df = pd.DataFrame({
        'date': pd.date_range(start='2024-01-01', periods=10),
        'avg_temp': [20, 22, 19, 23, 25, 22, 20, 18, 21, 24],
        'min_temp': [15, 17, 14, 18, 20, 18, 16, 14, 17, 19],
        'max_temp': [25, 27, 24, 28, 30, 26, 24, 22, 25, 29],
        'avg_humidity': [65, 70, 75, 60, 55, 65, 70, 75, 70, 60],
        'total_rain': [0, 2.5, 10, 0, 0, 1.5, 5, 8, 0, 0]
    })

In [None]:
# Cell 3: Create temperature visualization
plt.figure(figsize=(12, 6))
plt.plot(df['date'], df['max_temp'], 'r-', label='Max Temp (°C)')
plt.plot(df['date'], df['avg_temp'], 'g-', label='Avg Temp (°C)')
plt.plot(df['date'], df['min_temp'], 'b-', label='Min Temp (°C)')
plt.fill_between(df['date'], df['min_temp'], df['max_temp'], alpha=0.2, color='gray')
plt.title('Daily Temperature Range', fontsize=16)
plt.xlabel('Date')
plt.ylabel('Temperature (°C)')
plt.grid(True, alpha=0.3)
plt.legend()
plt.tight_layout()

# Save the figure
plt.savefig('temperature_plot.png')
print("Saved temperature plot to temperature_plot.png")


In [None]:
# Cell 4: Create precipitation and humidity visualization
fig, ax1 = plt.subplots(figsize=(12, 6))

color = 'tab:blue'
ax1.set_xlabel('Date')
ax1.set_ylabel('Precipitation (mm)', color=color)
ax1.bar(df['date'], df['total_rain'], color=color, alpha=0.7, width=0.8)
ax1.tick_params(axis='y', labelcolor=color)

ax2 = ax1.twinx()
color = 'tab:orange'
ax2.set_ylabel('Humidity (%)', color=color)
ax2.plot(df['date'], df['avg_humidity'], color=color, linewidth=2)
ax2.tick_params(axis='y', labelcolor=color)

plt.title('Daily Precipitation and Average Humidity', fontsize=16)
plt.grid(False)
fig.tight_layout()

# Save the figure
plt.savefig('precipitation_humidity_plot.png')
print("Saved precipitation and humidity plot to precipitation_humidity_plot.png")

# Cell 5: Upload plots to MinIO
try:
    # Upload temperature plot
    with open('temperature_plot.png', 'rb') as file:
        s3_client.put_object(
            Bucket='elyra-airflow',
            Key=f'{directory}/temperature_plot.png',
            Body=file,
            ContentType='image/png'
        )
    print(f"Successfully uploaded temperature plot to MinIO at {directory}/temperature_plot.png")
    
    # Upload precipitation/humidity plot
    with open('precipitation_humidity_plot.png', 'rb') as file:
        s3_client.put_object(
            Bucket='elyra-airflow',
            Key=f'{directory}/precipitation_humidity_plot.png',
            Body=file,
            ContentType='image/png'
        )
    print(f"Successfully uploaded precipitation/humidity plot to MinIO at {directory}/precipitation_humidity_plot.png")

    # Create a simple HTML report with the plots
    html_content = f"""
    <!DOCTYPE html>
    <html>
    <head>
        <title>Weather Data Analysis Report</title>
        <style>
            body {{ font-family: Arial, sans-serif; margin: 20px; }}
            h1 {{ color: #2c3e50; }}
            .plot {{ margin: 20px 0; text-align: center; }}
            .plot img {{ max-width: 100%; border: 1px solid #ddd; }}
        </style>
    </head>
    <body>
        <h1>Weather Data Analysis Report</h1>
        <p>Report generated on: {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M')}</p>
        
        <div class="plot">
            <h2>Temperature Trends</h2>
            <img src="temperature_plot.png" alt="Temperature Plot">
        </div>
        
        <div class="plot">
            <h2>Precipitation and Humidity</h2>
            <img src="precipitation_humidity_plot.png" alt="Precipitation and Humidity Plot">
        </div>
    </body>
    </html>
    """
    
    # Upload HTML report
    s3_client.put_object(
        Bucket='elyra-airflow',
        Key=f'{directory}/weather_report.html',
        Body=html_content,
        ContentType='text/html'
    )
    print(f"Successfully uploaded HTML report to MinIO at {directory}/weather_report.html")
    
except Exception as e:
    print(f"Error uploading visualizations to MinIO: {e}")