In [None]:
import pandas as pd
import numpy as np
from typing import Dict
import warnings
from sklearn.model_selection import train_test_split



def run_analysis(df: pd.DataFrame):
    """
    Run the complete regression analysis pipeline
    
    Args:
        df (pd.DataFrame): Input DataFrame with your movie data
    """
    # Step 1: Preprocess the data
    print("Starting data preprocessing...")
    period_dfs = regression_preprocessing(df)
    
    # Step 2: Randomly select features and target variable
    all_columns = df.columns.tolist()
    time_col = 'year'
    all_columns.remove(time_col)  # Ensure 'year' is not selected as a feature or target
    y_col = np.random.choice(all_columns)
    X_cols = [col for col in all_columns if col != y_col]
    
    print(f"Selected target column: {y_col}")
    print(f"Selected feature columns: {X_cols}")
    
    # Step 3: Split the data into training and test sets
    train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
    
    # Step 4: Run both types of regression
    
    # 4.1: Full dataset robust regression on training data
    print("\n=== Full Dataset Robust Regression (Training Data) ===")
    full_results = robust_regression_model(train_df[X_cols], train_df[y_col])
    print("\nFull Dataset Results (Training Data):")
    print(full_results['summary'])
    print(f"R-squared: {full_results['rsquared']:.4f}")
    print(f"Number of observations: {full_results['nobs']}")
    
    # 4.2: Period-specific regressions on training data
    print("\n=== Period-Specific Regressions (Training Data) ===")
    period_results = time_period_regressions(
        df=train_df,
        X_cols=X_cols,
        y_col=y_col,
        time_col=time_col
    )
    
    # Print results for each period
    if 'summary' in period_results:
        periods = period_results['summary']['period'].unique()
        for period in periods:
            period_data = period_results['summary'][
                period_results['summary']['period'] == period
            ]
            print(f"\nPeriod: {period}")
            print("-" * 50)
            print(period_data.drop('period', axis=1).to_string())
            print(f"R-squared: {period_data['r_squared'].iloc[0]:.4f}")
            print(f"Observations: {period_data['n_observations'].iloc[0]}")


ImportError: attempted relative import with no known parent package