# Phenology Analysis of Google Search Trends
This notebook analyzes seasonal patterns in weekly Google Trends data.

It extracts:
- Season start (first non-zero week)
- Season end (last non-zero week)
- Peak date & peak value
- Duration of activity
- Active-week statistics
- Median crossings
- Total searches

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime

def analyze_phenology(df):
    """
    Analyze search phenology patterns from raw weekly data (already loaded dataframe).
    """
    
    # Ensure date and numeric values
    df['date'] = pd.to_datetime(df['date'])
    df['search_count'] = pd.to_numeric(df['search_count'], errors='coerce').fillna(0).astype(int)
    
    results = []
    
    for (location, search_term, year), group in df.groupby(['location', 'search_term', 'year']):
        group = group.sort_values('date').reset_index(drop=True)
        if len(group) == 0:
            continue
        
        geo_code = group['geo_code'].iloc[0]
        state = group['state'].iloc[0]
        country = group['country'].iloc[0]
        
        dates = group['date'].values
        search_counts = group['search_count'].values
        
        total_searches = int(np.sum(search_counts))
        
        # Non-zero indices
        non_zero_mask = search_counts > 0
        non_zero_indices = np.where(non_zero_mask)[0]
        
        # Season start
        if len(non_zero_indices) > 0:
            start_idx = non_zero_indices[0]
            season_start_date = pd.to_datetime(dates[start_idx]).strftime('%Y-%m-%d')
            season_start_count = int(search_counts[start_idx])
        else:
            season_start_date = None
            season_start_count = 0
            start_idx = None
        
        # Season end
        if len(non_zero_indices) > 0:
            end_idx = non_zero_indices[-1]
            season_end_date = pd.to_datetime(dates[end_idx]).strftime('%Y-%m-%d')
            season_end_count = int(search_counts[end_idx])
        else:
            season_end_date = None
            season_end_count = 0
            end_idx = None
        
        # Peak
        peak_idx = np.argmax(search_counts)
        peak_date = pd.to_datetime(dates[peak_idx]).strftime('%Y-%m-%d')
        peak_count = int(search_counts[peak_idx])
        
        # Duration
        if start_idx is not None and end_idx is not None:
            duration_weeks = end_idx - start_idx
            duration_days = (pd.to_datetime(dates[end_idx]) - pd.to_datetime(dates[start_idx])).days
        else:
            duration_weeks = 0
            duration_days = 0
        
        # Stats
        non_zero_counts = search_counts[non_zero_mask]
        if len(non_zero_counts) > 0:
            avg_count_active = float(np.mean(non_zero_counts))
            min_count_active = int(np.min(non_zero_counts))
            max_count_active = int(np.max(non_zero_counts))
            num_active_weeks = len(non_zero_counts)
        else:
            avg_count_active = 0.0
            min_count_active = 0
            max_count_active = 0
            num_active_weeks = 0
        
        median_all_weeks = float(np.median(search_counts))
        median_active_weeks = float(np.median(non_zero_counts)) if len(non_zero_counts) > 0 else 0.0
        
        # Median crossings
        median_crossings = 0
        for i in range(1, len(search_counts)):
            if (search_counts[i-1] < median_all_weeks <= search_counts[i]) or \
               (search_counts[i-1] >= median_all_weeks > search_counts[i]):
                median_crossings += 1
        
        total_weeks = len(search_counts)
        
        result = {
            'location': location,
            'latitude': group['latitude'].iloc[0],
            'longitude': group['longitude'].iloc[0],
            'geo_code': geo_code,
            'state': state,
            'country': country,
            'search_term': search_term,
            'year': int(year),
            'season_start_date': season_start_date if season_start_date else 'N/A',
            'season_start_count': season_start_count,
            'peak_date': peak_date,
            'peak_count': peak_count,
            'season_end_date': season_end_date if season_end_date else 'N/A',
            'season_end_count': season_end_count,
            'duration_weeks': duration_weeks,
            'duration_days': duration_days,
            'num_active_weeks': num_active_weeks,
            'total_weeks': total_weeks,
            'median_all_weeks': round(median_all_weeks, 1),
            'median_active_weeks': round(median_active_weeks, 1),
            'median_crossings': median_crossings,
            'avg_count_active_weeks': round(avg_count_active, 1),
            'min_count_active_weeks': min_count_active,
            'max_count_active_weeks': max_count_active,
            'total_searches': total_searches
        }
        
        results.append(result)
    
    return pd.DataFrame(results)