In [10]:
import zipfile
import os
import rasterio
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [11]:
# Get the current working directory (location of the notebook/script)
script_dir = os.getcwd()

print(f"Your script is located in: {script_dir}")


Your script is located in: /Users/egeberk/Anaconda/Temperature


In [12]:
# Define directories
base_dir = "/Users/egeberk/Anaconda/Temperature"
input_dir = os.path.join(base_dir, "Zipped")
output_dir = os.path.join(base_dir, "Data and Graphs")
extraction_dir = os.path.join(output_dir, "Extracted")

# Ensure directories exist
os.makedirs(input_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)
os.makedirs(extraction_dir, exist_ok=True)

# Initialize a DataFrame to store results
TemperatureDataFrame = pd.DataFrame()

In [13]:
# List all files in the input directory
if os.path.exists(input_dir):
    input_files = os.listdir(input_dir)
    print(f"Files in '{input_dir}':")
    for file in input_files:
        print(file)
else:
    print(f"The input directory '{input_dir}' does not exist.")

Files in '/Users/egeberk/Anaconda/Temperature/Zipped':
Alice_Arm.zip
Hastings.zip
MountainRatz.zip
Juneau.zip
Hyder.zip
Chickamin.zip
.ipynb_checkpoints
Unuk.zip


In [14]:
def process_uploaded_zip(zip_file_path, region_name):
    """
    Process a ZIP file containing GeoTIFF files.
    
    Parameters:
        zip_file_path (str): Path to the ZIP file.
        region_name (str): Name of the region associated with the ZIP file.
    """
    global TemperatureDataFrame

    # Extract the ZIP file
    try:
        with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
            zip_ref.extractall(extraction_dir)
    except Exception as e:
        print(f"Error extracting {zip_file_path}: {e}")
        return

    # List all extracted files
    extracted_files = [os.path.join(extraction_dir, f) for f in os.listdir(extraction_dir)]
    
    # Process each GeoTIFF file in the extracted directory
    for file_path in extracted_files:
        if not file_path.endswith('.tif') or not os.path.basename(file_path).startswith('clipped_'):
            continue  # Skip non-TIFF files and those not starting with 'clipped_'
        
        # Parse year and month from the file name
        try:
            file_name = os.path.basename(file_path)
            parts = file_name.split("_")
            month = int(parts[-2])  # Second-to-last part is the month
            year = int(parts[-1][:4])  # Last part (first 4 characters) is the year
        except (IndexError, ValueError):
            print(f"Skipping file with unexpected naming pattern: {file_name}")
            continue

        # Process the GeoTIFF file
        try:
            with rasterio.open(file_path) as src:
                data = src.read(1)  # Read the raster data
                nodata = src.meta.get('nodata', -9999)  # Default nodata value if missing

                # Exclude NoData values
                valid_data = data[data != nodata]
                if valid_data.size == 0:
                    print(f"No valid data in file: {file_name}")
                    continue

                # Calculate statistics
                stats = {
                    "Year": year,
                    "Month": month,
                    "Region": region_name,
                    "Mean Temperature (°C)": np.mean(valid_data),
                    "Standard Deviation (°C)": np.std(valid_data),
                    "Min Temperature (°C)": np.min(valid_data),
                    "Max Temperature (°C)": np.max(valid_data),
                    "File Name": file_name
                }
                TemperatureDataFrame = pd.concat([TemperatureDataFrame, pd.DataFrame([stats])], ignore_index=True)
        except Exception as e:
            print(f"Error processing file {file_name}: {e}")

    # Clean up extracted files
    for file_path in extracted_files:
        os.remove(file_path)


In [15]:
print("Columns in TemperatureDataFrame:", TemperatureDataFrame.columns)
print("First few rows of TemperatureDataFrame:")
print(TemperatureDataFrame.head())


Columns in TemperatureDataFrame: RangeIndex(start=0, stop=0, step=1)
First few rows of TemperatureDataFrame:
Empty DataFrame
Columns: []
Index: []


In [21]:
def generate_and_save_graphs():
    """
    Generate and save graphs based on the TemperatureDataFrame, with both individual graphs
    and grid layouts for comparison across regions. Includes readable value labels with an offset.
    """
    global TemperatureDataFrame

    def get_season(month):
        if month in [12, 1, 2]:
            return 'Winter'
        elif month in [3, 4, 5]:
            return 'Spring'
        elif month in [6, 7, 8]:
            return 'Summer'
        else:
            return 'Autumn'

    TemperatureDataFrame['Season'] = TemperatureDataFrame['Month'].apply(get_season)

    def add_value_labels(ax, x_data, y_data, offset=0.03):
        """ Adds value labels above points on the graph."""
        for x, y in zip(x_data, y_data):
            ax.text(x, y + offset, f"{y:.1f}", ha='center', fontsize=8)

    # Grid settings
    n_cols = 3  # 3 columns for the grid

    # Graph settings
    graph_types = [
        {
            "data": TemperatureDataFrame.groupby(['Region', 'Year', 'Season'])['Mean Temperature (°C)']
            .mean()
            .reset_index(),
            "title": "Seasonal Temperature Trends",
            "ylabel": "Mean Temperature (°C)",
            "xlabel": "Year",
            "pivot": ('Year', 'Season', 'Mean Temperature (°C)'),
            "filename": "Seasonal_Temperature_Trends",
        },
        {
            "data": TemperatureDataFrame.groupby(['Region', 'Year'])['Mean Temperature (°C)']
            .mean()
            .reset_index(),
            "title": "Yearly Average Temperature Trends",
            "ylabel": "Mean Temperature (°C)",
            "xlabel": "Year",
            "filename": "Yearly_Average_Temperature_Trends",
        },
    ]

    # Generate individual and grid graphs
    for graph in graph_types:
        data = graph["data"]
        regions = data['Region'].unique()

        # Individual graphs
        for region in regions:
            region_data = data[data['Region'] == region]
            plt.figure(figsize=(8, 5))
            ax = plt.gca()
            if 'pivot' in graph:
                region_pivot = region_data.pivot(
                    index=graph["pivot"][0], columns=graph["pivot"][1], values=graph["pivot"][2]
                )
                region_pivot.plot(marker='o', ax=ax)
                for column in region_pivot.columns:
                    add_value_labels(ax, region_pivot.index, region_pivot[column])
            else:
                x_data = region_data[graph["xlabel"]]
                y_data = region_data[graph["ylabel"]]
                ax.plot(x_data, y_data, marker='o')
                add_value_labels(ax, x_data, y_data)

            plt.title(f"{graph['title']} for {region}")
            plt.ylabel(graph["ylabel"])
            plt.xlabel(graph["xlabel"])
            plt.grid()
            plt.legend()
            plt.savefig(os.path.join(output_dir, f"{region}_{graph['filename']}.png"))
            plt.close()

        # Grid layout for comparison
        n_regions = len(regions)
        fig, axes = plt.subplots(1, n_regions, figsize=(5 * n_regions, 5), sharey=True, sharex=True)

        for ax, region in zip(axes, regions):
            region_data = data[data['Region'] == region]
            if 'pivot' in graph:
                region_pivot = region_data.pivot(
                    index=graph["pivot"][0], columns=graph["pivot"][1], values=graph["pivot"][2]
                )
                region_pivot.plot(marker='o', ax=ax)
                for column in region_pivot.columns:
                    add_value_labels(ax, region_pivot.index, region_pivot[column])
            else:
                x_data = region_data[graph["xlabel"]]
                y_data = region_data[graph["ylabel"]]
                ax.plot(x_data, y_data, marker='o')
                add_value_labels(ax, x_data, y_data)

            ax.set_title(f"{region}")
            ax.set_ylabel(graph["ylabel"])
            ax.set_xlabel(graph["xlabel"])
            if "xticks" in graph:
                ax.set_xticks(graph["xticks"])
                ax.set_xticklabels(graph.get("xticklabels", []))
            ax.grid()

        plt.suptitle(graph["title"])
        plt.tight_layout(rect=[0, 0, 1, 0.96])
        plt.savefig(os.path.join(output_dir, f"Grid_{graph['filename']}.png"))
        plt.close()

    # Generate year-specific monthly temperature trend grid plots
    years = TemperatureDataFrame['Year'].unique()  # Get unique years in the data
    for year in sorted(years):  # Iterate through each year
        yearly_data = TemperatureDataFrame[TemperatureDataFrame['Year'] == year]  # Filter data for the year

        # Prepare data for monthly trends
        data = yearly_data.groupby(['Region', 'Month'])['Mean Temperature (°C)'].mean().reset_index()

        regions = data['Region'].unique()  # Get unique regions
        n_regions = len(regions)

        # Create grid layout
        fig, axes = plt.subplots(1, n_regions, figsize=(5 * n_regions, 5), sharey=True, sharex=True)

        for ax, region in zip(axes, regions):
            region_data = data[data['Region'] == region]
            x_data = region_data['Month']
            y_data = region_data['Mean Temperature (°C)']
            ax.plot(x_data, y_data, marker='o')
            add_value_labels(ax, x_data, y_data)

            ax.set_title(f"{region}")
            ax.set_ylabel("Mean Temperature (°C)")
            ax.set_xlabel("Month")
            ax.set_xticks(range(1, 13))
            ax.set_xticklabels(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
            ax.grid()

        plt.suptitle(f"Monthly Temperature Trends for {year}")
        plt.tight_layout(rect=[0, 0, 1, 0.96])
        plt.savefig(os.path.join(output_dir, f"Grid_Monthly_Temperature_Trends_{year}.png"))
        plt.close()


In [22]:
# Process all ZIP files in the input directory
for zip_file in os.listdir(input_dir):
    if zip_file.endswith(".zip"):  # Ensure only ZIP files are processed
        zip_path = os.path.join(input_dir, zip_file)
        region_name = os.path.splitext(zip_file)[0]  # Use the file name (without extension) as the region name
        process_uploaded_zip(zip_path, region_name)

# Generate and save all graphs if the DataFrame is valid
if 'Month' in TemperatureDataFrame.columns and not TemperatureDataFrame.empty:
    generate_and_save_graphs()
else:
    print("No valid data found. Check input files and processing logic.")

# Save the TemperatureDataFrame to the folder
TemperatureDataFrame.to_csv(os.path.join(output_dir, "TemperatureDataFrame.csv"), index=False)
TemperatureDataFrame.to_excel(os.path.join(output_dir, "TemperatureDataFrame.xlsx"), index=False)

# Confirm saving
print(f"TemperatureDataFrame has been saved as CSV and Excel in the folder: {output_dir}")
print(f"All graphs have been saved in the folder: {output_dir}")

  plt.legend()


TemperatureDataFrame has been saved as CSV and Excel in the folder: /Users/egeberk/Anaconda/Temperature/Data and Graphs
All graphs have been saved in the folder: /Users/egeberk/Anaconda/Temperature/Data and Graphs
