In [1]:
import requests
import pandas as pd
from datetime import datetime, timedelta
import time

class NOABlendWeatherAnalyzer:
    def __init__(self):
        # Airport coordinates (exact Kalshi settlement locations)
        self.cities = {
            'denver': {
                'coords': (39.8667, -104.6667),  # Denver International Airport
                'name': 'Denver International Airport'
            },
            'miami': {
                'coords': (25.7833, -80.3167),  # Miami International Airport  
                'name': 'Miami International Airport'
            },
            'chicago': {
                'coords': (41.9786, -87.9048),  # O'Hare International Airport
                'name': 'Chicago O\'Hare Airport'
            },
            'los_angeles': {
                'coords': (33.9425, -118.4081),  # LAX Airport
                'name': 'Los Angeles International Airport'
            }
        }
    
    def get_noa_blend_forecast(self, city, target_date, target_time):
        """
        Get NOA blend forecast for specific date and time
        target_time should be in format 'HH:MM' (e.g., '06:00', '18:00')
        """
        if city not in self.cities:
            print(f"❌ City '{city}' not found")
            return None
            
        lat, lon = self.cities[city]['coords']
        
        # Use historical forecast API to get NOA blend
        url = "https://historical-forecast-api.open-meteo.com/v1/forecast"
        
        params = {
            'latitude': lat,
            'longitude': lon,
            'start_date': target_date,
            'end_date': target_date,
            'hourly': 'temperature_2m',
            'timezone': 'auto',
            'models': 'gfs_seamless'  # This is closest to NOA blend available
        }
        
        try:
            response = requests.get(url, params=params, timeout=30)
            if response.status_code == 200:
                data = response.json()
                
                if 'hourly' in data and len(data['hourly']['time']) > 0:
                    # Find the specific hour we want
                    times = data['hourly']['time']
                    temperatures = data['hourly']['temperature_2m']
                    
                    # Look for our target time
                    target_datetime = f"{target_date}T{target_time}"
                    
                    for i, time_str in enumerate(times):
                        if target_datetime in time_str:
                            temp_c = temperatures[i]
                            temp_f = (temp_c * 9/5) + 32 if temp_c is not None else None
                            return {
                                'city': city,
                                'date': target_date,
                                'time': target_time,
                                'datetime': target_datetime,
                                'temp_c': temp_c,
                                'temp_f': temp_f,
                                'model': 'NOA_blend_equivalent'
                            }
                    
                    # If exact time not found, find closest
                    print(f"⚠️  Exact time {target_time} not found, looking for closest...")
                    target_hour = int(target_time.split(':')[0])
                    
                    for i, time_str in enumerate(times):
                        dt = datetime.fromisoformat(time_str.replace('Z', '+00:00'))
                        if dt.hour == target_hour:
                            temp_c = temperatures[i]
                            temp_f = (temp_c * 9/5) + 32 if temp_c is not None else None
                            return {
                                'city': city,
                                'date': target_date,
                                'time': f"{dt.hour:02d}:00",
                                'datetime': dt.isoformat(),
                                'temp_c': temp_c,
                                'temp_f': temp_f,
                                'model': 'NOA_blend_equivalent'
                            }
                            
        except Exception as e:
            print(f"❌ Error getting NOA blend forecast for {city}: {e}")
        
        return None
    
    def get_target_forecasts(self):
        """Get forecasts for the specific times requested"""
        # Calculate target dates
        today = datetime.now().strftime('%Y-%m-%d')
        yesterday = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
        
        print(f"🎯 Getting NOA Blend forecasts for:")
        print(f"   📅 Today ({today}) at 6:00 AM")
        print(f"   📅 Yesterday ({yesterday}) at 6:00 PM")
        print()
        
        results = []
        
        # Get forecasts for all cities
        for city in self.cities.keys():
            print(f"🏙️  Processing {city.title()}...")
            
            # 6AM today
            forecast_6am_today = self.get_noa_blend_forecast(city, today, '06:00')
            if forecast_6am_today:
                results.append(forecast_6am_today)
                print(f"   ✅ 6AM today: {forecast_6am_today['temp_f']:.1f}°F")
            else:
                print(f"   ❌ 6AM today: No data")
            
            # 6PM yesterday  
            forecast_6pm_yesterday = self.get_noa_blend_forecast(city, yesterday, '18:00')
            if forecast_6pm_yesterday:
                results.append(forecast_6pm_yesterday)
                print(f"   ✅ 6PM yesterday: {forecast_6pm_yesterday['temp_f']:.1f}°F")
            else:
                print(f"   ❌ 6PM yesterday: No data")
            
            print()
            time.sleep(0.5)  # Be respectful to API
        
        return results
    
    def create_summary_dataframe(self, results):
        """Create a clean summary DataFrame"""
        if not results:
            return pd.DataFrame()
        
        df = pd.DataFrame(results)
        
        # Add readable labels
        df['time_label'] = df.apply(lambda row: 
            f"6AM Today" if row['time'] == '06:00' and row['date'] == datetime.now().strftime('%Y-%m-%d')
            else f"6PM Yesterday", axis=1)
        
        return df
    
    def print_summary(self, df):
        """Print a nice summary of the results"""
        if df.empty:
            print("❌ No forecast data available")
            return
        
        print("\n" + "="*60)
        print("📊 NOA BLEND FORECAST SUMMARY")
        print("="*60)
        
        # Group by time period
        for time_label in ['6PM Yesterday', '6AM Today']:
            subset = df[df['time_label'] == time_label]
            if not subset.empty:
                print(f"\n🕐 {time_label}:")
                for _, row in subset.iterrows():
                    print(f"   {row['city'].title():12} | {row['temp_f']:5.1f}°F ({row['temp_c']:4.1f}°C)")
        
        # Temperature comparison
        if len(df['time_label'].unique()) == 2:
            print(f"\n🌡️  TEMPERATURE CHANGES:")
            pivot = df.pivot_table(values='temp_f', index='city', columns='time_label', aggfunc='first')
            
            if '6PM Yesterday' in pivot.columns and '6AM Today' in pivot.columns:
                for city in pivot.index:
                    yesterday_temp = pivot.loc[city, '6PM Yesterday']
                    today_temp = pivot.loc[city, '6AM Today']
                    
                    if pd.notna(yesterday_temp) and pd.notna(today_temp):
                        change = today_temp - yesterday_temp
                        arrow = "📈" if change > 0 else "📉" if change < 0 else "➡️"
                        print(f"   {city.title():12} | {arrow} {change:+5.1f}°F change")

# Run the analysis
def main():
    analyzer = NOABlendWeatherAnalyzer()
    
    print("🚀 NOA Blend Weather Forecast Analysis")
    print("Getting forecasts for 6AM today and 6PM yesterday...")
    print()
    
    # Get the forecasts
    results = analyzer.get_target_forecasts()
    
    # Create summary DataFrame
    df = analyzer.create_summary_dataframe(results)
    
    # Print summary
    analyzer.print_summary(df)
    
    # Save to CSV if data exists
    if not df.empty:
        filename = f"noa_blend_forecasts_{datetime.now().strftime('%Y%m%d_%H%M')}.csv"
        df.to_csv(filename, index=False)
        print(f"\n💾 Data saved to: {filename}")
    
    return df

if __name__ == "__main__":
    df = main()

🚀 NOA Blend Weather Forecast Analysis
Getting forecasts for 6AM today and 6PM yesterday...

🎯 Getting NOA Blend forecasts for:
   📅 Today (2025-07-12) at 6:00 AM
   📅 Yesterday (2025-07-11) at 6:00 PM

🏙️  Processing Denver...
   ✅ 6AM today: 58.8°F
   ✅ 6PM yesterday: 63.5°F

🏙️  Processing Miami...
   ✅ 6AM today: 84.2°F
   ✅ 6PM yesterday: 90.7°F

🏙️  Processing Chicago...
   ✅ 6AM today: 68.9°F
   ✅ 6PM yesterday: 81.9°F

🏙️  Processing Los_Angeles...
   ✅ 6AM today: 63.9°F
   ✅ 6PM yesterday: 70.3°F


📊 NOA BLEND FORECAST SUMMARY

🕐 6PM Yesterday:
   Denver       |  63.5°F (17.5°C)
   Miami        |  90.7°F (32.6°C)
   Chicago      |  81.9°F (27.7°C)
   Los_Angeles  |  70.3°F (21.3°C)

🕐 6AM Today:
   Denver       |  58.8°F (14.9°C)
   Miami        |  84.2°F (29.0°C)
   Chicago      |  68.9°F (20.5°C)
   Los_Angeles  |  63.9°F (17.7°C)

🌡️  TEMPERATURE CHANGES:
   Chicago      | 📉 -13.0°F change
   Denver       | 📉  -4.7°F change
   Los_Angeles  | 📉  -6.5°F change
   Miami        

In [7]:
import requests
import pandas as pd
from datetime import datetime, timedelta
import time

class NOABlendWeatherAnalyzer:
    def __init__(self):
        # Airport coordinates (exact Kalshi settlement locations)
        self.cities = {
            'denver': {
                'coords': (39.8667, -104.6667),  # Denver International Airport
                'name': 'Denver International Airport'
            },
            'miami': {
                'coords': (25.7833, -80.3167),  # Miami International Airport  
                'name': 'Miami International Airport'
            },
            'chicago': {
                'coords': (41.9786, -87.9048),  # O'Hare International Airport
                'name': 'Chicago O\'Hare Airport'
            },
            'los_angeles': {
                'coords': (33.9425, -118.4081),  # LAX Airport
                'name': 'Los Angeles International Airport'
            }
        }
    
    def get_noa_blend_forecast(self, city, target_date, target_time):
        """
        Get NOA blend forecast for specific date and time
        target_time should be in format 'HH:MM' (e.g., '06:00', '18:00')
        """
        if city not in self.cities:
            print(f"❌ City '{city}' not found")
            return None
            
        lat, lon = self.cities[city]['coords']
        
        # Use historical forecast API to get NOA blend
        url = "https://historical-forecast-api.open-meteo.com/v1/forecast"
        
        params = {
            'latitude': lat,
            'longitude': lon,
            'start_date': target_date,
            'end_date': target_date,
            'hourly': 'temperature_2m',
            'timezone': 'auto',
            'models': 'gfs_seamless'  # This is closest to NOA blend available
        }
        
        try:
            response = requests.get(url, params=params, timeout=30)
            if response.status_code == 200:
                data = response.json()
                
                if 'hourly' in data and len(data['hourly']['time']) > 0:
                    # Find the specific hour we want
                    times = data['hourly']['time']
                    temperatures = data['hourly']['temperature_2m']
                    
                    # Look for our target time
                    target_datetime = f"{target_date}T{target_time}"
                    
                    for i, time_str in enumerate(times):
                        if target_datetime in time_str:
                            temp_c = temperatures[i]
                            temp_f = (temp_c * 9/5) + 32 if temp_c is not None else None
                            return {
                                'city': city,
                                'date': target_date,
                                'time': target_time,
                                'datetime': target_datetime,
                                'temp_c': temp_c,
                                'temp_f': temp_f,
                                'model': 'NOA_blend_equivalent'
                            }
                    
                    # If exact time not found, find closest
                    print(f"⚠️  Exact time {target_time} not found, looking for closest...")
                    target_hour = int(target_time.split(':')[0])
                    
                    for i, time_str in enumerate(times):
                        dt = datetime.fromisoformat(time_str.replace('Z', '+00:00'))
                        if dt.hour == target_hour:
                            temp_c = temperatures[i]
                            temp_f = (temp_c * 9/5) + 32 if temp_c is not None else None
                            return {
                                'city': city,
                                'date': target_date,
                                'time': f"{dt.hour:02d}:00",
                                'datetime': dt.isoformat(),
                                'temp_c': temp_c,
                                'temp_f': temp_f,
                                'model': 'NOA_blend_equivalent'
                            }
                            
        except Exception as e:
            print(f"❌ Error getting NOA blend forecast for {city}: {e}")
        
        return None
    
    def get_year_forecasts(self, start_date='2023-07-01', end_date='2024-06-30', sample_rate=1):
        """Get forecasts for entire year: 6AM each day + 6PM previous day"""
        start_dt = datetime.strptime(start_date, '%Y-%m-%d')
        end_dt = datetime.strptime(end_date, '%Y-%m-%d')
        
        total_days = (end_dt - start_dt).days + 1
        
        print(f"🚀 Getting NOA Blend forecasts for ENTIRE YEAR")
        print(f"📅 Date range: {start_date} to {end_date}")
        print(f"🏙️  Cities: {', '.join([c.title() for c in self.cities.keys()])}")
        print(f"📊 Total days to process: {total_days // sample_rate}")
        print(f"⏱️  Pattern: 6AM current day + 6PM previous day")
        print()
        
        results = []
        current_dt = start_dt
        processed_days = 0
        
        while current_dt <= end_dt:
            if processed_days % sample_rate == 0:
                date_str = current_dt.strftime('%Y-%m-%d')
                yesterday_str = (current_dt - timedelta(days=1)).strftime('%Y-%m-%d')
                
                # Progress indicator
                progress = (current_dt - start_dt).days / total_days * 100
                if processed_days % 30 == 0:  # Update every 30 days
                    print(f"📅 Progress: {progress:.1f}% - Processing {date_str}")
                
                # Get forecasts for all cities
                for city in self.cities.keys():
                    # 6AM forecast for current day
                    forecast_6am = self.get_noa_blend_forecast(city, date_str, '06:00')
                    if forecast_6am:
                        forecast_6am['time_period'] = 'morning'
                        forecast_6am['comparison_pair'] = date_str  # This groups the pair
                        results.append(forecast_6am)
                    
                    # 6PM forecast for previous day (skip if it would be before our start date)
                    if current_dt > start_dt:  # Don't go before start date
                        forecast_6pm = self.get_noa_blend_forecast(city, yesterday_str, '18:00')
                        if forecast_6pm:
                            forecast_6pm['time_period'] = 'evening'
                            forecast_6pm['comparison_pair'] = date_str  # Same pair ID
                            results.append(forecast_6pm)
                    
                    time.sleep(0.2)  # Be respectful to API
            
            current_dt += timedelta(days=1)
            processed_days += 1
        
        print(f"\n✅ Collection complete! Gathered {len(results)} forecast records")
        print(f"📊 Pattern: Each record compares 6AM with 6PM from previous day")
        return results
    
    def get_target_forecasts(self):
        """Get forecasts for the specific times requested (original function for single day)"""
        # Calculate target dates
        today = datetime.now().strftime('%Y-%m-%d')
        yesterday = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
        
        print(f"🎯 Getting NOA Blend forecasts for:")
        print(f"   📅 Today ({today}) at 6:00 AM")
        print(f"   📅 Yesterday ({yesterday}) at 6:00 PM")
        print()
        
        results = []
        
        # Get forecasts for all cities
        for city in self.cities.keys():
            print(f"🏙️  Processing {city.title()}...")
            
            # 6AM today
            forecast_6am_today = self.get_noa_blend_forecast(city, today, '06:00')
            if forecast_6am_today:
                forecast_6am_today['time_period'] = 'morning'
                results.append(forecast_6am_today)
                print(f"   ✅ 6AM today: {forecast_6am_today['temp_f']:.1f}°F")
            else:
                print(f"   ❌ 6AM today: No data")
            
            # 6PM yesterday  
            forecast_6pm_yesterday = self.get_noa_blend_forecast(city, yesterday, '18:00')
            if forecast_6pm_yesterday:
                forecast_6pm_yesterday['time_period'] = 'evening'
                results.append(forecast_6pm_yesterday)
                print(f"   ✅ 6PM yesterday: {forecast_6pm_yesterday['temp_f']:.1f}°F")
            else:
                print(f"   ❌ 6PM yesterday: No data")
            
            print()
            time.sleep(0.5)  # Be respectful to API
        
        return results
    
    def create_summary_dataframe(self, results):
        """Create a clean summary DataFrame"""
        if not results:
            return pd.DataFrame()
        
        df = pd.DataFrame(results)
        
        # For single day analysis, add readable labels
        if len(df) <= 8:  # Assuming max 4 cities * 2 times = 8 records for single day
            df['time_label'] = df.apply(lambda row: 
                f"6AM Today" if row['time'] == '06:00' and row['date'] == datetime.now().strftime('%Y-%m-%d')
                else f"6PM Yesterday", axis=1)
        
        return df
    
    def print_summary(self, df):
        """Print a nice summary of the results"""
        if df.empty:
            print("❌ No forecast data available")
            return
        
        # Check if this is single day or full year data
        if 'time_label' in df.columns:
            # Single day summary
            print("\n" + "="*60)
            print("📊 NOA BLEND FORECAST SUMMARY")
            print("="*60)
            
            # Group by time period
            for time_label in ['6PM Yesterday', '6AM Today']:
                subset = df[df['time_label'] == time_label]
                if not subset.empty:
                    print(f"\n🕐 {time_label}:")
                    for _, row in subset.iterrows():
                        print(f"   {row['city'].title():12} | {row['temp_f']:5.1f}°F ({row['temp_c']:4.1f}°C)")
            
            # Temperature comparison
            if len(df['time_label'].unique()) == 2:
                print(f"\n🌡️  TEMPERATURE CHANGES:")
                pivot = df.pivot_table(values='temp_f', index='city', columns='time_label', aggfunc='first')
                
                if '6PM Yesterday' in pivot.columns and '6AM Today' in pivot.columns:
                    for city in pivot.index:
                        yesterday_temp = pivot.loc[city, '6PM Yesterday']
                        today_temp = pivot.loc[city, '6AM Today']
                        
                        if pd.notna(yesterday_temp) and pd.notna(today_temp):
                            change = today_temp - yesterday_temp
                            arrow = "📈" if change > 0 else "📉" if change < 0 else "➡️"
                            print(f"   {city.title():12} | {arrow} {change:+5.1f}°F change")
        else:
            # Full year summary
            self.analyze_year_data(df)
    
    def analyze_year_data(self, df):
        """Analyze the full year dataset"""
        if df.empty:
            print("❌ No data to analyze")
            return
        
        print("\n" + "="*70)
        print("📊 FULL YEAR NOA BLEND ANALYSIS")
        print("="*70)
        
        # Basic statistics
        print(f"📈 Total records: {len(df)}")
        print(f"📅 Date range: {df['date'].min()} to {df['date'].max()}")
        print(f"🏙️  Cities: {', '.join(df['city'].unique())}")
        print(f"⏰ Time periods: {', '.join(df['time_period'].unique())}")
        
        # Temperature statistics by city and time
        print("\n🌡️  TEMPERATURE STATISTICS:")
        temp_stats = df.groupby(['city', 'time_period'])['temp_f'].agg(['mean', 'min', 'max', 'std']).round(2)
        print(temp_stats)
        
        # Monthly patterns
        df['month'] = pd.to_datetime(df['date']).dt.month
        df['month_name'] = pd.to_datetime(df['date']).dt.strftime('%B')
        
        print("\n📊 MONTHLY AVERAGE TEMPERATURES:")
        monthly_temps = df.groupby(['city', 'month_name', 'time_period'])['temp_f'].mean().round(1)
        for city in df['city'].unique():
            print(f"\n{city.upper()}:")
            city_monthly = monthly_temps[city].unstack(fill_value=0)
            if 'morning' in city_monthly.columns:
                print("  Morning (6AM):", dict(city_monthly['morning']))
            if 'evening' in city_monthly.columns:
                print("  Evening (6PM):", dict(city_monthly['evening']))
        
        # Seasonal patterns
        def get_season(month):
            if month in [12, 1, 2]:
                return 'Winter'
            elif month in [3, 4, 5]:
                return 'Spring'
            elif month in [6, 7, 8]:
                return 'Summer'
            else:
                return 'Fall'
        
        df['season'] = df['month'].apply(get_season)
        
        print("\n🌍 SEASONAL PATTERNS:")
        seasonal_temps = df.groupby(['city', 'season', 'time_period'])['temp_f'].mean().round(1)
        for city in df['city'].unique():
            print(f"\n{city.upper()}:")
            city_seasonal = seasonal_temps[city].unstack(fill_value=0)
            for season in ['Winter', 'Spring', 'Summer', 'Fall']:
                if season in city_seasonal.index:
                    morning = city_seasonal.loc[season, 'morning'] if 'morning' in city_seasonal.columns else 'N/A'
                    evening = city_seasonal.loc[season, 'evening'] if 'evening' in city_seasonal.columns else 'N/A'
                    print(f"  {season:6}: 6AM={morning:5}°F, 6PM={evening:5}°F")
        
        # Daily temperature transitions (6PM previous day to 6AM current day)
        print("\n🌡️  OVERNIGHT TEMPERATURE TRANSITIONS (6PM yesterday → 6AM today):")
        transition_data = []
        
        # Group by comparison_pair to get matching 6PM and 6AM records
        if 'comparison_pair' in df.columns:
            for pair_date in df['comparison_pair'].unique():
                pair_data = df[df['comparison_pair'] == pair_date]
                
                for city in pair_data['city'].unique():
                    city_pair = pair_data[pair_data['city'] == city]
                    
                    morning_data = city_pair[city_pair['time_period'] == 'morning']
                    evening_data = city_pair[city_pair['time_period'] == 'evening']
                    
                    if len(morning_data) > 0 and len(evening_data) > 0:
                        morning_temp = morning_data['temp_f'].iloc[0]
                        evening_temp = evening_data['temp_f'].iloc[0]
                        evening_date = evening_data['date'].iloc[0]
                        morning_date = morning_data['date'].iloc[0]
                        
                        transition = morning_temp - evening_temp
                        transition_data.append({
                            'city': city,
                            'evening_date': evening_date,
                            'morning_date': morning_date,
                            'evening_temp': evening_temp,
                            'morning_temp': morning_temp,
                            'overnight_change': transition
                        })
        else:
            # Fallback for older data format - same day transitions
            for city in df['city'].unique():
                city_data = df[df['city'] == city]
                for date in city_data['date'].unique():
                    date_data = city_data[city_data['date'] == date]
                    morning_temp = date_data[date_data['time_period'] == 'morning']['temp_f'].values
                    evening_temp = date_data[date_data['time_period'] == 'evening']['temp_f'].values
                    
                    if len(morning_temp) > 0 and len(evening_temp) > 0:
                        swing = evening_temp[0] - morning_temp[0]
                        transition_data.append({
                            'city': city,
                            'date': date,
                            'morning_temp': morning_temp[0],
                            'evening_temp': evening_temp[0],
                            'swing': swing
                        })
        
        if transition_data:
            transition_df = pd.DataFrame(transition_data)
            
            if 'overnight_change' in transition_df.columns:
                print("📊 Overnight Temperature Changes (6PM yesterday → 6AM today):")
                transition_stats = transition_df.groupby('city')['overnight_change'].agg(['mean', 'min', 'max', 'std']).round(2)
                print(transition_stats)
                
                print("\n🌡️  Average Overnight Changes by City:")
                for city in transition_df['city'].unique():
                    avg_change = transition_df[transition_df['city'] == city]['overnight_change'].mean()
                    trend = "cooling" if avg_change < 0 else "warming" if avg_change > 0 else "stable"
                    print(f"  {city.title():12}: {avg_change:+5.1f}°F ({trend})")
            else:
                # Fallback display for same-day swings
                transition_stats = transition_df.groupby('city')['swing'].agg(['mean', 'min', 'max', 'std']).round(2)
                print(transition_stats)
        
        return df

# Run the analysis
def main():
    analyzer = NOABlendWeatherAnalyzer()
    
    print("🚀 NOA Blend Weather Forecast Analysis")
    print("Choose analysis type:")
    print("1. Single day (6AM today, 6PM yesterday)")
    print("2. Full year analysis")
    
    choice = input("\nEnter choice (1 or 2): ").strip()
    
    if choice == '2':
        # Full year analysis
        print("\n🌍 Running FULL YEAR analysis...")
        
        # Get full year data
        results = analyzer.get_year_forecasts(
            start_date='2023-07-01',
            end_date='2024-06-30',
            sample_rate=1  # Every day
        )
        
        # Create DataFrame
        df = analyzer.create_summary_dataframe(results)
        
        # Analyze the year data
        if not df.empty:
            analyzer.print_summary(df)
            
            # Save to CSV
            filename = f"noa_blend_year_analysis_{datetime.now().strftime('%Y%m%d_%H%M')}.csv"
            df.to_csv(filename, index=False)
            print(f"\n💾 Full year data saved to: {filename}")
        else:
            print("❌ No year data collected")
    
    else:
        # Single day analysis (default)
        print("\n📅 Running single day analysis...")
        
        # Get the forecasts
        results = analyzer.get_target_forecasts()
        
        # Create summary DataFrame
        df = analyzer.create_summary_dataframe(results)
        
        # Print summary
        analyzer.print_summary(df)
        
        # Save to CSV if data exists
        if not df.empty:
            filename = f"noa_blend_forecasts_{datetime.now().strftime('%Y%m%d_%H%M')}.csv"
            df.to_csv(filename, index=False)
            print(f"\n💾 Data saved to: {filename}")
    
    return df

# Quick function to run full year without prompts
def run_full_year_analysis():
    """Run full year analysis directly"""
    analyzer = NOABlendWeatherAnalyzer()
    
    print("🚀 NOA Blend Weather Forecast - FULL YEAR ANALYSIS")
    print("This will collect 6AM and 6PM forecasts for an entire year...")
    print("⏱️  Estimated time: 15-20 minutes")
    print()
    
    # Get full year data
    results = analyzer.get_year_forecasts(
        start_date='2023-07-01',
        end_date='2024-06-30',
        sample_rate=1  # Every day
    )
    
    # Create DataFrame
    df = analyzer.create_summary_dataframe(results)
    
    # Analyze the year data
    if not df.empty:
        analyzer.print_summary(df)
        
        # Save to CSV
        filename = f"noa_blend_year_analysis_{datetime.now().strftime('%Y%m%d_%H%M')}.csv"
        df.to_csv(filename, index=False)
        print(f"\n💾 Full year data saved to: {filename}")
        
        return df
    else:
        print("❌ No year data collected")
        return pd.DataFrame()

if __name__ == "__main__":
    df = main()

🚀 NOA Blend Weather Forecast Analysis
Choose analysis type:
1. Single day (6AM today, 6PM yesterday)
2. Full year analysis



Enter choice (1 or 2):  2



🌍 Running FULL YEAR analysis...
🚀 Getting NOA Blend forecasts for ENTIRE YEAR
📅 Date range: 2023-07-01 to 2024-06-30
🏙️  Cities: Denver, Miami, Chicago, Los_Angeles
📊 Total days to process: 366
⏱️  Pattern: 6AM current day + 6PM previous day

📅 Progress: 0.0% - Processing 2023-07-01
📅 Progress: 8.2% - Processing 2023-07-31
📅 Progress: 16.4% - Processing 2023-08-30
📅 Progress: 24.6% - Processing 2023-09-29
📅 Progress: 32.8% - Processing 2023-10-29
📅 Progress: 41.0% - Processing 2023-11-28
📅 Progress: 49.2% - Processing 2023-12-28
📅 Progress: 57.4% - Processing 2024-01-27
📅 Progress: 65.6% - Processing 2024-02-26
📅 Progress: 73.8% - Processing 2024-03-27
📅 Progress: 82.0% - Processing 2024-04-26
📅 Progress: 90.2% - Processing 2024-05-26
📅 Progress: 98.4% - Processing 2024-06-25

✅ Collection complete! Gathered 2924 forecast records
📊 Pattern: Each record compares 6AM with 6PM from previous day

📊 FULL YEAR NOA BLEND ANALYSIS
📈 Total records: 2924
📅 Date range: 2023-07-01 to 2024-06-30
🏙

In [9]:
import pandas as pd
import numpy as np
from datetime import datetime

def extract_actual_temps_from_kalshi(kalshi_file_path):
    """
    Extract actual temperatures from Kalshi timing data.
    
    Args:
        kalshi_file_path (str): Path to the Kalshi timing CSV file
    
    Returns:
        pd.DataFrame: DataFrame with unique date-city-actual_temp combinations
    """
    print("Loading Kalshi timing data...")
    
    # Read the Kalshi data
    kalshi_df = pd.read_csv(kalshi_file_path)
    
    print(f"✅ Loaded {len(kalshi_df)} rows from Kalshi data")
    
    # Extract unique actual temperatures by date and city
    # Since there are multiple timing_types per date/city, we need to deduplicate
    actual_temps = kalshi_df[['target_date', 'city', 'actual_temp_f']].drop_duplicates()
    actual_temps = actual_temps.rename(columns={'target_date': 'date'})
    
    print(f"✅ Extracted {len(actual_temps)} unique date-city temperature combinations")
    
    # Show coverage by city
    print("\nActual temperature data by city:")
    for city in actual_temps['city'].unique():
        city_data = actual_temps[actual_temps['city'] == city]
        dates = sorted(city_data['date'].unique())
        print(f"{city}: {len(city_data)} dates from {dates[0]} to {dates[-1]}")
    
    return actual_temps

def merge_with_noa_blend(noa_file_path, actual_temps_df):
    """
    Merge actual temperatures with Noa Blend analysis data.
    
    Args:
        noa_file_path (str): Path to the Noa Blend CSV file
        actual_temps_df (pd.DataFrame): DataFrame with actual temperatures
    
    Returns:
        pd.DataFrame: Merged DataFrame with actual temperatures
    """
    print(f"\nLoading Noa Blend data...")
    
    # Read the Noa Blend data
    noa_df = pd.read_csv(noa_file_path)
    
    print(f"✅ Loaded {len(noa_df)} rows from Noa Blend data")
    
    # Merge the data on date and city
    merged_df = noa_df.merge(
        actual_temps_df,
        on=['date', 'city'],
        how='left'
    )
    
    # Add a flag to track which rows have actual temperature data
    merged_df['has_actual_temp'] = merged_df['actual_temp_f'].notna()
    
    # Summary statistics
    with_actual = merged_df[merged_df['has_actual_temp']]
    without_actual = merged_df[~merged_df['has_actual_temp']]
    
    print(f"\n📊 Merge Results:")
    print(f"Rows with actual temperature: {len(with_actual)}")
    print(f"Rows without actual temperature: {len(without_actual)}")
    print(f"Match rate: {len(with_actual) / len(merged_df) * 100:.1f}%")
    
    return merged_df

def analyze_predictions(merged_df):
    """
    Analyze the difference between Noa predictions and actual temperatures.
    
    Args:
        merged_df (pd.DataFrame): Merged DataFrame with both predicted and actual temps
    """
    # Filter to rows with both predicted and actual temperatures
    valid_comparisons = merged_df[
        (merged_df['temp_f'].notna()) & 
        (merged_df['actual_temp_f'].notna())
    ].copy()
    
    if len(valid_comparisons) == 0:
        print("❌ No valid comparisons found!")
        return
    
    # Calculate errors
    valid_comparisons['error'] = valid_comparisons['temp_f'] - valid_comparisons['actual_temp_f']
    valid_comparisons['abs_error'] = valid_comparisons['error'].abs()
    
    print(f"\n📈 Noa Blend vs Actual Temperature Analysis:")
    print(f"Valid comparisons: {len(valid_comparisons)}")
    print(f"Mean error: {valid_comparisons['error'].mean():.2f}°F")
    print(f"Mean absolute error: {valid_comparisons['abs_error'].mean():.2f}°F")
    print(f"Max error: {valid_comparisons['error'].max():.1f}°F")
    print(f"Min error: {valid_comparisons['error'].min():.1f}°F")
    print(f"Standard deviation: {valid_comparisons['error'].std():.2f}°F")
    
    # Analysis by city
    print(f"\n🏙️ Performance by City:")
    city_stats = valid_comparisons.groupby('city').agg({
        'error': ['count', 'mean', 'std'],
        'abs_error': 'mean'
    }).round(2)
    
    for city in city_stats.index:
        count = city_stats.loc[city, ('error', 'count')]
        mean_error = city_stats.loc[city, ('error', 'mean')]
        mae = city_stats.loc[city, ('abs_error', 'mean')]
        print(f"{city}: {count} comparisons, Mean Error: {mean_error:.2f}°F, MAE: {mae:.2f}°F")
    
    # Show sample of comparisons
    print(f"\n🔍 Sample comparisons:")
    sample_data = valid_comparisons.head(10)
    for idx, row in sample_data.iterrows():
        print(f"{row['city']} {row['date']} - Noa: {row['temp_f']:.1f}°F, Actual: {row['actual_temp_f']:.1f}°F, Diff: {row['error']:.1f}°F")

def main():
    """
    Main function to run the complete analysis.
    """
    # File paths - adjust these to match your file locations
    kalshi_file = '/Users/bentodd/full_year_kalshi_timing_2023-07-01_to_2024-06-30.csv'
    noa_file = '/Users/bentodd/noa_blend_year_analysis_20250713_1209.csv'
    output_file = 'merged_noa_blend_with_actual_temps.csv'
    
    try:
        # Step 1: Extract actual temperatures from Kalshi data
        actual_temps = extract_actual_temps_from_kalshi(kalshi_file)
        
        # Step 2: Merge with Noa Blend data
        merged_data = merge_with_noa_blend(noa_file, actual_temps)
        
        # Step 3: Analyze the results
        analyze_predictions(merged_data)
        
        # Step 4: Save the merged data
        merged_data.to_csv(output_file, index=False)
        print(f"\n✅ SUCCESS: Merged data saved to '{output_file}'")
        print(f"Dataset now includes 'actual_temp_f' and 'has_actual_temp' columns")
        print(f"Total rows: {len(merged_data)}")
        print(f"Total columns: {len(merged_data.columns)}")
        
        return merged_data
        
    except Exception as e:
        print(f"❌ Error: {str(e)}")
        return None

if __name__ == "__main__":
    # Run the analysis
    merged_data = main()
    
    # Optional: Display basic info about the merged dataset
    if merged_data is not None:
        print(f"\n📋 Final Dataset Info:")
        print(f"Columns: {list(merged_data.columns)}")
        print(f"Date range: {merged_data['date'].min()} to {merged_data['date'].max()}")
        print(f"Cities: {list(merged_data['city'].unique())}")

Loading Kalshi timing data...
✅ Loaded 6489 rows from Kalshi data
✅ Extracted 1082 unique date-city temperature combinations

Actual temperature data by city:
denver: 271 dates from 2023-07-01 to 2024-03-27
miami: 271 dates from 2023-07-01 to 2024-03-27
chicago: 270 dates from 2023-07-01 to 2024-03-26
los_angeles: 270 dates from 2023-07-01 to 2024-03-26

Loading Noa Blend data...
✅ Loaded 2924 rows from Noa Blend data

📊 Merge Results:
Rows with actual temperature: 2164
Rows without actual temperature: 760
Match rate: 74.0%

📈 Noa Blend vs Actual Temperature Analysis:
Valid comparisons: 2164
Mean error: -8.87°F
Mean absolute error: 9.02°F
Max error: 9.7°F
Min error: -38.3°F
Standard deviation: 8.02°F

🏙️ Performance by City:
chicago: 540 comparisons, Mean Error: -7.30°F, MAE: 7.56°F
denver: 542 comparisons, Mean Error: -13.24°F, MAE: 13.44°F
los_angeles: 540 comparisons, Mean Error: -8.75°F, MAE: 8.78°F
miami: 542 comparisons, Mean Error: -6.17°F, MAE: 6.30°F

🔍 Sample comparisons:
den