In [None]:
# Install all required packages from requirements.txt
import subprocess
import sys

subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-r', 'requirements.txt'])

print('✅ All packages installed from requirements.txt!')

# 🌍 Regional Temperature Trends Analysis

Advanced analysis notebook demonstrating regional temperature analysis using BigQuery GSOD data with comprehensive visualizations.

In [None]:
import os
import subprocess
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from google.cloud import bigquery
from google.oauth2 import credentials
import warnings
warnings.filterwarnings('ignore')

# Set up plotting
plt.style.use('default')
sns.set_palette('husl')
plt.rcParams['figure.figsize'] = (15, 10)

print('✅ Libraries imported!')

In [None]:
def get_access_token():
    """Get GCP access token using Coder external auth"""
    try:
        result = subprocess.run(
            ['coder', 'external-auth', 'access-token', 'gcp'],
            capture_output=True,
            text=True,
            check=True
        )
        return result.stdout.strip()
    except subprocess.CalledProcessError as e:
        print(f'Error: {e}')
        return None

# Authenticate with BigQuery
access_token = get_access_token()
if access_token:
    print('✅ Access token obtained')
    creds = credentials.Credentials(token=access_token)
    client = bigquery.Client(credentials=creds, project='coder-vertex-demos')
    print('✅ BigQuery client ready')
else:
    print('❌ Failed to get access token')
    print('💡 Make sure external auth is configured: coder external-auth access-token gcp')

In [None]:
# Define function to assign geographic regions
def assign_region(station_number):
    """Assign rough geographic regions based on station number patterns"""
    if station_number < 100000:
        return 'North America (Low)'
    elif station_number < 200000:
        return 'North America (Mid)'
    elif station_number < 400000:
        return 'Europe/Asia (West)'
    elif station_number < 600000:
        return 'Asia (Central)'
    elif station_number < 800000:
        return 'Asia (East)'
    else:
        return 'Other Regions'

print('✅ Regional classification function defined')

In [None]:
# Query detailed temperature data for regional analysis
query = '''
SELECT 
    station_number,
    year,
    month,
    mean_temp,
    max_temperature,
    total_precipitation
FROM `bigquery-public-data.samples.gsod`
WHERE mean_temp IS NOT NULL 
  AND year >= 1950
LIMIT 5000
'''

print('🔍 Executing regional temperature query...')
df = client.query(query).to_dataframe()
df['region'] = df['station_number'].apply(assign_region)

print(f'✅ Retrieved {len(df)} temperature records')
print(f'📊 Regions found: {df["region"].nunique()}')

# Display regional distribution
regional_counts = df['region'].value_counts()
print('\n📍 Records by region:')
for region, count in regional_counts.items():
    avg_temp = df[df['region'] == region]['mean_temp'].mean()
    print(f'   {region}: {count:,} records (avg: {avg_temp:.1f}°F)')

In [None]:
# Create seasonal classification
df['season'] = df['month'].map({
    12: 'Winter', 1: 'Winter', 2: 'Winter',
    3: 'Spring', 4: 'Spring', 5: 'Spring', 
    6: 'Summer', 7: 'Summer', 8: 'Summer',
    9: 'Fall', 10: 'Fall', 11: 'Fall'
})

print('✅ Seasonal classification created')

In [None]:
# Create comprehensive regional temperature visualizations
fig, axes = plt.subplots(2, 3, figsize=(20, 12))
fig.suptitle('🌍 Temperature Trends by Geographic Region - Example Analysis', fontsize=18, fontweight='bold')

# 1. Average temperature by region
region_temps = df.groupby('region')['mean_temp'].agg(['mean', 'std']).reset_index()
bars = axes[0,0].bar(region_temps['region'], region_temps['mean'], 
                     yerr=region_temps['std'], capsize=5, alpha=0.8, 
                     color=['skyblue', 'lightgreen', 'coral', 'gold', 'lightcoral', 'plum'])
axes[0,0].set_title('🌡️ Average Temperature by Region')
axes[0,0].set_ylabel('Temperature (°F)')
axes[0,0].tick_params(axis='x', rotation=45)
axes[0,0].grid(axis='y', alpha=0.3)

# Add value labels
for i, (bar, temp) in enumerate(zip(bars, region_temps['mean'])):
    axes[0,0].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1, 
                   f'{temp:.1f}°F', ha='center', va='bottom', fontweight='bold')

# 2. Temperature distribution by region (violin plot)
regions_for_violin = df['region'].value_counts().head(5).index
violin_data = [df[df['region'] == region]['mean_temp'].dropna() for region in regions_for_violin]
parts = axes[0,1].violinplot(violin_data, positions=range(len(regions_for_violin)))
axes[0,1].set_title('📊 Temperature Distribution by Region')
axes[0,1].set_ylabel('Temperature (°F)')
axes[0,1].set_xticks(range(len(regions_for_violin)))
axes[0,1].set_xticklabels([r.replace(' ', '\n') for r in regions_for_violin], fontsize=9)
axes[0,1].grid(axis='y', alpha=0.3)

# 3. Seasonal temperature patterns
seasonal_temps = df.groupby(['region', 'season'])['mean_temp'].mean().unstack()
seasonal_temps = seasonal_temps.reindex(columns=['Winter', 'Spring', 'Summer', 'Fall'])
seasonal_temps.plot(kind='bar', ax=axes[0,2], alpha=0.8, width=0.8)
axes[0,2].set_title('🌿 Seasonal Temperature Patterns')
axes[0,2].set_ylabel('Temperature (°F)')
axes[0,2].tick_params(axis='x', rotation=45)
axes[0,2].legend(title='Season', bbox_to_anchor=(1.05, 1))
axes[0,2].grid(axis='y', alpha=0.3)

# 4. Temperature trends over time
yearly_trends = df.groupby(['year', 'region'])['mean_temp'].mean().unstack()
top_regions = df['region'].value_counts().head(4).index
colors = ['blue', 'red', 'green', 'orange']
for i, region in enumerate(top_regions):
    if region in yearly_trends.columns:
        axes[1,0].plot(yearly_trends.index, yearly_trends[region], 
                      marker='o', linewidth=2, alpha=0.8, label=region, color=colors[i])
axes[1,0].set_title('📈 Temperature Trends Over Time')
axes[1,0].set_xlabel('Year')
axes[1,0].set_ylabel('Temperature (°F)')
axes[1,0].legend(bbox_to_anchor=(1.05, 1))
axes[1,0].grid(True, alpha=0.3)

# 5. Temperature vs Precipitation scatter
precip_temp = df.dropna(subset=['total_precipitation', 'mean_temp'])
for i, region in enumerate(top_regions[:4]):
    region_data = precip_temp[precip_temp['region'] == region]
    if len(region_data) > 0:
        axes[1,1].scatter(region_data['total_precipitation'], region_data['mean_temp'], 
                         alpha=0.6, label=region, s=30, color=colors[i])
axes[1,1].set_title('🌧️ Temperature vs Precipitation')
axes[1,1].set_xlabel('Precipitation (inches)')
axes[1,1].set_ylabel('Temperature (°F)')
axes[1,1].legend()
axes[1,1].grid(True, alpha=0.3)

# 6. Regional temperature extremes
temp_stats = df.groupby('region').agg({
    'mean_temp': ['min', 'max', 'mean']
}).round(1)
temp_stats.columns = ['Min', 'Max', 'Average']
temp_stats = temp_stats.head(5)

x = np.arange(len(temp_stats))
width = 0.25

axes[1,2].bar(x - width, temp_stats['Min'], width, label='Minimum', alpha=0.8, color='lightblue')
axes[1,2].bar(x, temp_stats['Average'], width, label='Average', alpha=0.8, color='orange')
axes[1,2].bar(x + width, temp_stats['Max'], width, label='Maximum', alpha=0.8, color='red')

axes[1,2].set_title('🔥 Temperature Extremes by Region')
axes[1,2].set_ylabel('Temperature (°F)')
axes[1,2].set_xticks(x)
axes[1,2].set_xticklabels([r.replace(' ', '\n') for r in temp_stats.index], fontsize=9)
axes[1,2].legend()
axes[1,2].grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Create summary heatmap of regional climate characteristics
plt.figure(figsize=(12, 8))

# Prepare data for heatmap
climate_summary = df.groupby(['region', 'season'])['mean_temp'].mean().unstack()
climate_summary = climate_summary.reindex(columns=['Winter', 'Spring', 'Summer', 'Fall'])

# Create heatmap
sns.heatmap(climate_summary, annot=True, fmt='.1f', cmap='RdYlBu_r', 
            cbar_kws={'label': 'Temperature (°F)'}, 
            linewidths=0.5, linecolor='white')
plt.title('🗺️ Regional Climate Heatmap: Example Analysis', 
          fontsize=16, fontweight='bold', pad=20)
plt.ylabel('Geographic Region', fontweight='bold')
plt.xlabel('Season', fontweight='bold')
plt.xticks(rotation=0)
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()

print('🎯 Heatmap shows clear seasonal and regional temperature patterns!')

In [None]:
# Print regional analysis summary
print('🌍 REGIONAL TEMPERATURE ANALYSIS SUMMARY')
print('=' * 60)

for region in df['region'].value_counts().head(6).index:
    region_data = df[df['region'] == region]['mean_temp'].dropna()
    if len(region_data) > 0:
        print(f'\n📍 {region.upper()}:')
        print(f'   Records: {len(region_data):,}')
        print(f'   Average Temperature: {region_data.mean():.1f}°F')
        print(f'   Temperature Range: {region_data.min():.1f}°F to {region_data.max():.1f}°F')
        print(f'   Standard Deviation: {region_data.std():.1f}°F')

print(f'\n🌡️ KEY INSIGHTS:')
print('=' * 40)

# Regional extremes
hottest_region = region_temps.loc[region_temps['mean'].idxmax(), 'region']
coldest_region = region_temps.loc[region_temps['mean'].idxmin(), 'region']
hottest_temp = region_temps['mean'].max()
coldest_temp = region_temps['mean'].min()

print(f'🔥 Hottest Region: {hottest_region} ({hottest_temp:.1f}°F average)')
print(f'🧊 Coldest Region: {coldest_region} ({coldest_temp:.1f}°F average)')
print(f'📊 Temperature Range: {hottest_temp - coldest_temp:.1f}°F difference')

print('\n✅ Regional temperature analysis complete!')
print('\n💡 This is an example analysis. Modify regions, queries, and visualizations as needed!')
print('📝 Note: Geographic regions are approximated based on station number patterns.')