In [None]:
!pip install -q scikit-learn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.multioutput import MultiOutputRegressor
import warnings
warnings.filterwarnings('ignore')

In [None]:
class SmartphoneBenchmarkPredictor:
    def __init__(self):
        self.model = None
        self.scaler = StandardScaler()
        self.label_encoders = {}
        self.feature_names = []
        self.target_names = ['cpu_score', 'gpu_score', 'memory_score', 'ux_score',
                           'single_core_score', 'multi_core_score', 'overall_score']

    def generate_synthetic_dataset(self, n_samples=5000):
        """Generate comprehensive synthetic smartphone dataset"""
        np.random.seed(42)

        # Define realistic ranges for smartphone specifications
        processors = {
            'Snapdragon 8 Gen 3': {'base_freq': 3.3, 'cores': 8, 'nm': 4, 'performance_factor': 1.0},
            'Snapdragon 8 Gen 2': {'base_freq': 3.2, 'cores': 8, 'nm': 4, 'performance_factor': 0.95},
            'Snapdragon 8 Gen 1': {'base_freq': 3.0, 'cores': 8, 'nm': 4, 'performance_factor': 0.90},
            'Apple A17 Pro': {'base_freq': 3.8, 'cores': 6, 'nm': 3, 'performance_factor': 1.1},
            'Apple A16 Bionic': {'base_freq': 3.5, 'cores': 6, 'nm': 4, 'performance_factor': 1.05},
            'Apple A15 Bionic': {'base_freq': 3.2, 'cores': 6, 'nm': 5, 'performance_factor': 1.0},
            'Dimensity 9300': {'base_freq': 3.25, 'cores': 8, 'nm': 4, 'performance_factor': 0.98},
            'Exynos 2400': {'base_freq': 3.2, 'cores': 10, 'nm': 4, 'performance_factor': 0.92},
            'Tensor G4': {'base_freq': 3.1, 'cores': 8, 'nm': 4, 'performance_factor': 0.88}
        }

        gpus = {
            'Adreno 750': {'performance_factor': 1.0, 'memory_bandwidth': 100},
            'Adreno 740': {'performance_factor': 0.9, 'memory_bandwidth': 90},
            'Apple GPU (6-core)': {'performance_factor': 1.1, 'memory_bandwidth': 120},
            'Apple GPU (5-core)': {'performance_factor': 1.0, 'memory_bandwidth': 100},
            'Mali-G720 MP12': {'performance_factor': 0.85, 'memory_bandwidth': 85},
            'Xclipse 940': {'performance_factor': 0.88, 'memory_bandwidth': 88}
        }

        data = []

        for i in range(n_samples):
            # Random device specifications
            processor = np.random.choice(list(processors.keys()))
            gpu = np.random.choice(list(gpus.keys()))

            # Basic specs
            ram_gb = np.random.choice([4, 6, 8, 12, 16, 18, 24], p=[0.05, 0.15, 0.25, 0.25, 0.20, 0.08, 0.02])
            storage_gb = np.random.choice([64, 128, 256, 512, 1024], p=[0.05, 0.25, 0.35, 0.25, 0.10])
            screen_size = np.random.uniform(5.0, 7.5)
            resolution_width = np.random.choice([1080, 1440, 1644, 2160, 2400])
            resolution_height = int(resolution_width * np.random.uniform(1.8, 2.4))
            refresh_rate = np.random.choice([60, 90, 120, 144], p=[0.3, 0.2, 0.4, 0.1])
            battery_mah = np.random.randint(3000, 6000)
            os = 'Android' if 'Snapdragon' in processor or 'Dimensity' in processor or 'Exynos' in processor or 'Tensor' in processor else 'iOS'

            # Calculate synthetic benchmark scores based on specifications
            proc_specs = processors[processor]
            gpu_specs = gpus[gpu]

            # CPU Score (influenced by processor, cores, frequency)
            cpu_base = proc_specs['base_freq'] * proc_specs['cores'] * proc_specs['performance_factor'] * 50000
            cpu_score = int(cpu_base * np.random.uniform(0.85, 1.15))

            # Single-core score
            single_core_score = int(proc_specs['base_freq'] * proc_specs['performance_factor'] * 1000 * np.random.uniform(0.8, 1.2))

            # Multi-core score
            multi_core_score = int(single_core_score * proc_specs['cores'] * 0.8 * np.random.uniform(0.85, 1.15))

            # GPU Score (influenced by GPU performance and resolution)
            gpu_base = gpu_specs['performance_factor'] * gpu_specs['memory_bandwidth'] * 800
            resolution_factor = (resolution_width * resolution_height) / (1920 * 1080)
            gpu_score = int(gpu_base / resolution_factor * np.random.uniform(0.8, 1.2))

            # Memory Score (influenced by RAM and storage)
            memory_base = (ram_gb * 5000) + (storage_gb * 10)
            memory_score = int(memory_base * np.random.uniform(0.9, 1.1))

            # UX Score (influenced by overall system performance, refresh rate)
            ux_base = (cpu_score * 0.3 + gpu_score * 0.2 + memory_score * 0.3) / 1000
            ux_base *= (refresh_rate / 60) * np.random.uniform(0.85, 1.15)
            ux_score = int(ux_base)

            # Overall Score (weighted average of all scores)
            overall_score = int((cpu_score * 0.25 + gpu_score * 0.25 + memory_score * 0.15 +
                              ux_score * 0.15 + single_core_score * 0.1 + multi_core_score * 0.1))

            data.append({
                'processor': processor,
                'gpu': gpu,
                'ram_gb': ram_gb,
                'storage_gb': storage_gb,
                'screen_size_inches': screen_size,
                'resolution_width': resolution_width,
                'resolution_height': resolution_height,
                'refresh_rate_hz': refresh_rate,
                'battery_mah': battery_mah,
                'os': os,
                'processor_cores': proc_specs['cores'],
                'processor_base_freq_ghz': proc_specs['base_freq'],
                'processor_nm': proc_specs['nm'],
                'cpu_score': cpu_score,
                'gpu_score': gpu_score,
                'memory_score': memory_score,
                'ux_score': ux_score,
                'single_core_score': single_core_score,
                'multi_core_score': multi_core_score,
                'overall_score': overall_score
            })

        return pd.DataFrame(data)

    def preprocess_data(self, df):
        """Preprocess the dataset for training"""
        df_processed = df.copy()

        # Encode categorical variables
        categorical_columns = ['processor', 'gpu', 'os']
        for col in categorical_columns:
            if col not in self.label_encoders:
                self.label_encoders[col] = LabelEncoder()
                df_processed[col] = self.label_encoders[col].fit_transform(df_processed[col])
            else:
                df_processed[col] = self.label_encoders[col].transform(df_processed[col])

        # Create additional features
        df_processed['total_pixels'] = df_processed['resolution_width'] * df_processed['resolution_height']
        df_processed['pixel_density'] = df_processed['total_pixels'] / (df_processed['screen_size_inches'] ** 2)
        df_processed['performance_per_core'] = df_processed['processor_base_freq_ghz'] / df_processed['processor_cores']
        df_processed['ram_to_storage_ratio'] = df_processed['ram_gb'] / df_processed['storage_gb']
        df_processed['battery_per_pixel'] = df_processed['battery_mah'] / df_processed['total_pixels']

        return df_processed

    def train_model(self, df, test_size=0.2):
        """Train the machine learning model"""
        # Preprocess data
        df_processed = self.preprocess_data(df)

        # Separate features and targets
        feature_columns = [col for col in df_processed.columns if col not in self.target_names]
        X = df_processed[feature_columns]
        y = df_processed[self.target_names]

        self.feature_names = feature_columns

        # Split data
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)

        # Scale features
        X_train_scaled = self.scaler.fit_transform(X_train)
        X_test_scaled = self.scaler.transform(X_test)

        # Train multiple models and select the best one
        models = {
            'RandomForest': MultiOutputRegressor(RandomForestRegressor(n_estimators=100, random_state=42)),
            'GradientBoosting': MultiOutputRegressor(GradientBoostingRegressor(n_estimators=100, random_state=42)),
            'LinearRegression': MultiOutputRegressor(LinearRegression())
        }

        best_model = None
        best_score = -np.inf
        results = {}

        print("Training and evaluating models...")
        for name, model in models.items():
            # Train model
            model.fit(X_train_scaled, y_train)

            # Predict
            y_pred = model.predict(X_test_scaled)

            # Evaluate
            r2 = r2_score(y_test, y_pred)
            mse = mean_squared_error(y_test, y_pred)
            mae = mean_absolute_error(y_test, y_pred)

            results[name] = {'R2': r2, 'MSE': mse, 'MAE': mae}

            print(f"{name}: R² = {r2:.4f}, MSE = {mse:.2f}, MAE = {mae:.2f}")

            if r2 > best_score:
                best_score = r2
                best_model = model
                self.model = model

        print(f"\nBest model: {[k for k, v in results.items() if v['R2'] == best_score][0]} with R² = {best_score:.4f}")

        # Feature importance (if available)
        if hasattr(self.model.estimators_[0], 'feature_importances_'):
            feature_importance = np.mean([est.feature_importances_ for est in self.model.estimators_], axis=0)
            importance_df = pd.DataFrame({
                'feature': self.feature_names,
                'importance': feature_importance
            }).sort_values('importance', ascending=False)

            print("\nTop 10 Most Important Features:")
            print(importance_df.head(10))

        return results, X_test_scaled, y_test

    def predict_benchmark_scores(self, device_specs):
        """Predict benchmark scores for a new device"""
        if self.model is None:
            raise ValueError("Model not trained. Please train the model first.")

        # Convert to DataFrame
        if isinstance(device_specs, dict):
            device_specs = pd.DataFrame([device_specs])

        # Preprocess
        device_processed = self.preprocess_data(device_specs)
        device_features = device_processed[self.feature_names]

        # Scale features
        device_scaled = self.scaler.transform(device_features)

        # Predict
        predictions = self.model.predict(device_scaled)

        # Return as dictionary
        results = {}
        for i, target in enumerate(self.target_names):
            results[target] = int(predictions[0][i])

        return results

    def visualize_results(self, X_test, y_test):
        """Visualize model performance"""
        if self.model is None:
            return

        y_pred = self.model.predict(X_test)

        fig, axes = plt.subplots(2, 4, figsize=(20, 10))
        axes = axes.flatten()

        for i, target in enumerate(self.target_names):
            axes[i].scatter(y_test.iloc[:, i], y_pred[:, i], alpha=0.6)
            axes[i].plot([y_test.iloc[:, i].min(), y_test.iloc[:, i].max()],
                        [y_test.iloc[:, i].min(), y_test.iloc[:, i].max()], 'r--', lw=2)
            axes[i].set_xlabel(f'Actual {target.replace("_", " ").title()}')
            axes[i].set_ylabel(f'Predicted {target.replace("_", " ").title()}')
            axes[i].set_title(f'{target.replace("_", " ").title()} Prediction')

            # Calculate R²
            r2 = r2_score(y_test.iloc[:, i], y_pred[:, i])
            axes[i].text(0.05, 0.95, f'R² = {r2:.3f}', transform=axes[i].transAxes,
                        bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

        # Hide the last subplot if not needed
        if len(self.target_names) < len(axes):
            axes[-1].set_visible(False)

        plt.tight_layout()
        plt.show()

In [None]:
def main():
    # Initialize the predictor
    predictor = SmartphoneBenchmarkPredictor()
    
    # Generate synthetic dataset
    print("Generating synthetic dataset...")
    df = predictor.generate_synthetic_dataset(n_samples=5000)
    print(f"Dataset created with {len(df)} samples")
    print("\nDataset Info:")
    print(df.info())
    print("\nFirst few rows:")
    print(df.head())
    
    # Train the model
    print("\n" + "="*50)
    print("TRAINING MODEL")
    print("="*50)
    results, X_test, y_test = predictor.train_model(df)
    
    # Visualize results
    print("\n" + "="*50)
    print("VISUALIZING RESULTS")
    print("="*50)
    predictor.visualize_results(X_test, y_test)
    
    # Example predictions
    print("\n" + "="*50)
    print("EXAMPLE PREDICTIONS")
    print("="*50)
    
    # Example device 1: High-end Android
    device1 = {
        'processor': 'Snapdragon 8 Gen 3',
        'gpu': 'Adreno 750',
        'ram_gb': 12,
        'storage_gb': 256,
        'screen_size_inches': 6.7,
        'resolution_width': 1440,
        'resolution_height': 3120,
        'refresh_rate_hz': 120,
        'battery_mah': 4500,
        'os': 'Android',
        'processor_cores': 8,
        'processor_base_freq_ghz': 3.3,
        'processor_nm': 4
    }
    
    # Example device 2: iPhone
    device2 = {
        'processor': 'Apple A17 Pro',
        'gpu': 'Apple GPU (6-core)',
        'ram_gb': 8,
        'storage_gb': 512,
        'screen_size_inches': 6.1,
        'resolution_width': 1179,
        'resolution_height': 2556,
        'refresh_rate_hz': 120,
        'battery_mah': 3274,
        'os': 'iOS',
        'processor_cores': 6,
        'processor_base_freq_ghz': 3.8,
        'processor_nm': 3
    }
    
    print("Device 1 (High-end Android):")
    pred1 = predictor.predict_benchmark_scores(device1)
    for score_type, score in pred1.items():
        print(f"  {score_type.replace('_', ' ').title()}: {score:,}")
    
    print("\nDevice 2 (iPhone):")
    pred2 = predictor.predict_benchmark_scores(device2)
    for score_type, score in pred2.items():
        print(f"  {score_type.replace('_', ' ').title()}: {score:,}")
    
    # Compare overall scores
    print(f"\nComparison:")
    print(f"Android Overall Score: {pred1['overall_score']:,}")
    print(f"iPhone Overall Score: {pred2['overall_score']:,}")
    print(f"Difference: {abs(pred1['overall_score'] - pred2['overall_score']):,} points")

if __name__ == "__main__":
    main()