In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

data = pd.read_csv('user_behavior_dataset.csv')
data.head()
data.info()

In [None]:
numerical_columns = ['App Usage Time (min/day)', 'Screen On Time (hours/day)', 
                     'Battery Drain (mAh/day)', 'Number of Apps Installed', 
                     'Data Usage (MB/day)', 'Age', 'User Behavior Class']

desc_stats = data[numerical_columns].describe()
desc_stats
Q1 = data[numerical_columns].quantile(0.25)
Q3 = data[numerical_columns].quantile(0.75)
IQR = Q3 - Q1
outliers = ((data[numerical_columns] < (Q1 - 1.5 * IQR)) | (data[numerical_columns] > (Q3 + 1.5 * IQR)))
outliers.sum()  

In [None]:
data[numerical_columns].hist(bins=20, figsize=(14, 10))
plt.suptitle("Histograms for Numerical Variables")
plt.show()

categorical_columns = ['Device Model', 'Operating System', 'Gender']
for col in categorical_columns:
    sns.countplot(data[col])
    plt.title(f"Bar Chart for {col}")
    plt.show()

In [None]:
plt.figure(figsize=(10,6))
sns.boxplot(data=data.select_dtypes(include=['int64', 'float64'])) 

plt.xticks(rotation=45) 
plt.xlabel('Numerical Variables')  
plt.ylabel('Value Distribution') 
plt.title('Box Plot of All Numerical Variables')

plt.show()

In [None]:
sns.pairplot(data[numerical_columns])
plt.suptitle("Pairwise Scatter Plot Matrix", y=1.02)
plt.show()

In [None]:
cross_tab = pd.crosstab(data['Device Model'], data['Operating System'])
sns.heatmap(cross_tab, annot=True, cmap='Blues')
plt.title('Device Model vs Operating System')
plt.show()

In [None]:
sns.scatterplot(x='App Usage Time (min/day)', y='Data Usage (MB/day)', hue='Gender', data=data)
plt.title('App Usage vs Data Usage (colored by Gender)')
plt.show()