In [None]:
# Import necessary libraries
import pandas as pd                 # For reading CSV and data manipulation
import matplotlib.pyplot as plt     # For plotting
from matplotlib.backends.backend_pdf import PdfPages  # To save multiple plots into a single PDF


In [None]:
# ------------------------------
# 1. Read the CSV file
# ------------------------------

# Read the CSV file and parse the 'Date' column as datetime
df = pd.read_csv(r'C:\Users\ViktoriaYavorska\OneDrive - Doscon AS\NPHD\Courses\IND320\IND320_VY\ASSIGNMENTS\PART 1\open-meteo-subset.csv',parse_dates=[0])
df.rename(columns={df.columns[0]: 'time'}, inplace=True)

# Get all numeric columns (all except the first 'Date' column)
numeric_columns = df.columns[1:]  

# Print the first few rows to check data
print(df.head())

# Print the data types of each column to confirm parsing
print(df.dtypes)

In [None]:
# ------------------------------
# 2. Save all plots to a PDF
# ------------------------------

# Create a PDF file to store all plots
with PdfPages('weather_report.pdf') as pdf:
    
    # --------------------------
    # 2a. Plot each column separately
    # --------------------------
    for column in numeric_columns:
        plt.figure(figsize=(10, 4))          # Set figure size
        plt.plot(df['time'], df[column])     # Plot column vs Date
        plt.title(f'{column} over Time')     # Title of plot
        plt.xlabel('Date')                   # X-axis label
        plt.ylabel(column)                   # Y-axis label
        plt.grid(True)                       # Show grid for better readability
        pdf.savefig()                         # Save this figure to the PDF
        
    
    # --------------------------
    # 2b. Plot all columns normalized together
    # --------------------------
    
    # Make a copy of the dataframe to normalize data
    df_normalized = df.copy()
    
    # Normalize each numeric column to range 0-1
    for column in numeric_columns:
        df_normalized[column] = (df[column] - df[column].min()) / (df[column].max() - df[column].min())
    
    # Create a figure for normalized plot
    plt.figure(figsize=(12, 6))
    
    # Plot each normalized column
    for column in numeric_columns:
        plt.plot(df_normalized['time'], df_normalized[column], label=column)
    
    # Add title, labels, legend, and grid
    plt.title('All Measurements (Normalized)')
    plt.xlabel('Date')
    plt.ylabel('Normalized Value')
    plt.legend()
    plt.grid(True)
    
    pdf.savefig()   # Save normalized plot to PDF
    
    
    # --------------------------
    # 2c. Plot all columns in subplots
    # --------------------------
    
    # Create subplots: one row per variable
    fig, axes = plt.subplots(len(numeric_columns), 1, figsize=(12, 3*len(numeric_columns)), sharex=True)
    
    # Loop through axes and columns
    for ax, column in zip(axes, numeric_columns):
        ax.plot(df['time'], df[column])   # Plot column vs Date
        ax.set_ylabel(column)             # Y-axis label
        ax.grid(True)                     # Add grid
    
    # Set X-axis label only for the bottom subplot
    axes[-1].set_xlabel('time')
    
    # Adjust layout to prevent overlap
    plt.tight_layout()
    
    pdf.savefig()   # Save subplots figure to PDF