In [None]:
import pandas as pd
from typing import Dict

from test_data import create_sample_data, get_weekly_sales_data
from technical_interview_solution import process_sales_batch

Technical Interview Question 2:

2. For both `combine_sales_with_asset_data()` and `create_consolidated_weekly_report()` functions:
   - Explain the purpose of the function and its parameters to us.
   - Write a docstring and type hints for these functions.

In [None]:
sales_data, asset_data = create_sample_data()
weekly_results = get_weekly_sales_data(sales_data)

In [None]:
result = process_sales_batch(weekly_results['2023-01'])

In [None]:
def combine_sales_with_asset_data(sales_results: pd.DataFrame, asset_data: pd.DataFrame, target_tz: str = None) -> pd.DataFrame:
    """
    Docstring
    """
    # Comment
    datetime_cols = [col for col in sales_results.columns if sales_results[col].dtype == 'datetime64[ns]']
    for col in datetime_cols:
        if pd.api.types.is_datetime64_any_dtype(sales_results[col]):
            if sales_results[col].dt.tz is None:
                sales_results[col] = sales_results[col].dt.tz_localize('UTC')
            else:
                sales_results[col] = sales_results[col].dt.tz_convert('UTC')
    
    # Comment
    asset_portfolio_summary = asset_data.groupby('portfolio_id').agg({
        'asset_id': 'count',
        'geography': lambda x: x.mode().iloc[0] if not x.mode().empty else None,
        'ISO': lambda x: list(x.unique()),
        'operational_date': ['min', 'max'],
        'timezone': lambda x: list(x.unique())
    })
    
    asset_portfolio_summary.columns = [
        'asset_count', 'primary_geography', 'iso_regions', 
        'oldest_asset_date', 'newest_asset_date', 'timezones'
    ]
    asset_portfolio_summary = asset_portfolio_summary.reset_index()
    
    today = pd.Timestamp.now().date()
    asset_portfolio_summary['portfolio_age_years'] = asset_portfolio_summary['oldest_asset_date'].apply(
        lambda x: (today - x).days / 365.25
    ).round(1)
    
    combined_data = pd.merge(
        sales_results,
        asset_portfolio_summary,
        on='portfolio_id',
        how='left'
    )
    
    combined_data['mwh_per_asset'] = combined_data['MWh'] / combined_data['asset_count']
    combined_data['revenue_per_asset'] = combined_data['sales_amount'] / combined_data['asset_count']
    combined_data['revenue_per_mwh'] = combined_data['sales_amount'] / combined_data['MWh']
    
    # Comment
    if target_tz:
        for col in datetime_cols:
            if pd.api.types.is_datetime64_any_dtype(combined_data[col]):
                combined_data[col] = combined_data[col].dt.tz_convert(target_tz)
                
        combined_data['target_timezone'] = target_tz
    
    return combined_data

In [None]:
def create_consolidated_weekly_report(weekly_results: Dict[str, pd.DataFrame]) -> Dict[str, pd.DataFrame]:
    """
    Docstring
    """
    if not weekly_results:
        return {"error": "No weekly data provided"}
    
    all_weeks_data = pd.concat(weekly_results.values(), ignore_index=True)
    
    weekly_comparison = pd.pivot_table(
        all_weeks_data, 
        values=['MWh', 'sales_amount', 'price', 'revenue_per_mwh'],
        index=['portfolio_id', 'asset_type'],
        columns='year_week'
    )
    
    weekly_comparison = weekly_comparison.reset_index()
    
    summary = all_weeks_data.groupby(['portfolio_id', 'asset_type']).agg({
        'MWh': 'sum',
        'sales_amount': 'sum',
        'transaction_count': 'sum',
        'price': 'mean',
        'asset_count': 'first',
        'primary_geography': 'first',
        'portfolio_age_years': 'first'
    }).reset_index()
    
    summary['avg_weekly_revenue'] = summary['sales_amount'] / len(weekly_results)
    summary['avg_revenue_per_mwh'] = summary['sales_amount'] / summary['MWh']
    
    return weekly_comparison, summary, all_weeks_data