In [40]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pandas import ExcelWriter

# Load dataset
file_path = r'C:/Users/Revathi/Documents/Dataset/smart_home_device_usage_data.csv'
df = pd.read_csv(file_path)


In [41]:
# Calculate descriptive statistics for each DeviceType
def calculate_statistics(df):
    stats = []
    
    device_types = df['DeviceType'].unique()
    
    for device in device_types:
        device_df = df[df['DeviceType'] == device]
        
        stats.append({
            'DeviceType': device,
            'Feature': 'EnergyConsumption',
            'Mean': device_df['EnergyConsumption'].mean(),
            'Standard Deviation': device_df['EnergyConsumption'].std(),
            'Median': device_df['EnergyConsumption'].median(),
            '25th Percentile': device_df['EnergyConsumption'].quantile(0.25),
            '75th Percentile': device_df['EnergyConsumption'].quantile(0.75)
        })
        
        stats.append({
            'DeviceType': device,
            'Feature': 'UsageHoursPerDay',
            'Mean': device_df['UsageHoursPerDay'].mean(),
            'Standard Deviation': device_df['UsageHoursPerDay'].std(),
            'Median': device_df['UsageHoursPerDay'].median(),
            '25th Percentile': device_df['UsageHoursPerDay'].quantile(0.25),
            '75th Percentile': device_df['UsageHoursPerDay'].quantile(0.75)
        })
        
        stats.append({
            'DeviceType': device,
            'Feature': 'MalfunctionIncidents',
            'Mean': device_df['MalfunctionIncidents'].mean(),
            'Standard Deviation': device_df['MalfunctionIncidents'].std(),
            'Median': device_df['MalfunctionIncidents'].median(),
            '25th Percentile': device_df['MalfunctionIncidents'].quantile(0.25),
            '75th Percentile': device_df['MalfunctionIncidents'].quantile(0.75)
        })
        
        stats.append({
            'DeviceType': device,
            'Feature': 'DeviceAgeMonths',
            'Mean': device_df['DeviceAgeMonths'].mean(),
            'Standard Deviation': device_df['DeviceAgeMonths'].std(),
            'Median': device_df['DeviceAgeMonths'].median(),
            '25th Percentile': device_df['DeviceAgeMonths'].quantile(0.25),
            '75th Percentile': device_df['DeviceAgeMonths'].quantile(0.75)
        })
    
    return stats


In [42]:
# Store statistics in an Excel sheet
def store_statistics_in_excel(stats, filename='statistics_by_device_type.xlsx'):
    stats_df = pd.DataFrame(stats)
    with pd.ExcelWriter(filename, engine='openpyxl') as writer:
        stats_df.to_excel(writer, sheet_name='Statistics', index=False)


In [43]:
def main():
    # Load dataset
    df = pd.read_csv(file_path)
    
    # Calculate statistics
    stats = calculate_statistics(df)
    print("Descriptive Statistics:")
    for stat in stats:
        print(f"\nDevice Type: {stat['DeviceType']}")
        print(f"  Feature: {stat['Feature']}")
        print(f"    Mean: {stat['Mean']:.2f}")
        print(f"    Standard Deviation: {stat['Standard Deviation']:.2f}")
        print(f"    Median: {stat['Median']:.2f}")
        print(f"    25th Percentile: {stat['25th Percentile']:.2f}")
        print(f"    75th Percentile: {stat['75th Percentile']:.2f}")
    
    # Store the statistics in an Excel sheet
    store_statistics_in_excel(stats)
    
    print("\nStatistics have been saved to statistics_by_device_type.xlsx")

# Run the main function
if __name__ == "__main__":
    main()


Descriptive Statistics:

Device Type: Smart Speaker
  Feature: EnergyConsumption
    Mean: 4.87
    Standard Deviation: 2.89
    Median: 4.74
    25th Percentile: 2.36
    75th Percentile: 7.41

Device Type: Smart Speaker
  Feature: UsageHoursPerDay
    Mean: 11.98
    Standard Deviation: 6.68
    Median: 11.83
    25th Percentile: 6.18
    75th Percentile: 17.71

Device Type: Smart Speaker
  Feature: MalfunctionIncidents
    Mean: 2.04
    Standard Deviation: 1.42
    Median: 2.00
    25th Percentile: 1.00
    75th Percentile: 3.00

Device Type: Smart Speaker
  Feature: DeviceAgeMonths
    Mean: 30.59
    Standard Deviation: 17.08
    Median: 31.00
    25th Percentile: 16.00
    75th Percentile: 45.00

Device Type: Camera
  Feature: EnergyConsumption
    Mean: 5.08
    Standard Deviation: 2.93
    Median: 4.96
    25th Percentile: 2.53
    75th Percentile: 7.74

Device Type: Camera
  Feature: UsageHoursPerDay
    Mean: 12.11
    Standard Deviation: 6.85
    Median: 12.33
    25th Perc