In [1]:
# Import Statements

import pandas as pd

In [2]:
# Loading Data

df = pd.read_csv("main_data.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,Age Group,Gender,Education Level,Occupation,Average Screen Time,Device,Screen Activity,App Category,Screen Time Period,Environment,Productivity,Attention Span,Work Strategy,Notification Handling,Usage of Productivity Apps
0,0,18–24,Male,Undergraduate,Student,More than 10,Smartphone,"Entertainment (gaming, streaming, social media...","Social Media (e.g., Facebook, Instagram, Linke...",Evening (6 PM–10 PM),Quite workplace,Moderately productive,10–30 minutes,Take regular breaks,Check them briefly and resume my work,"Yes, but i did not find them of any help"
1,1,18–24,Male,Undergraduate,Professional,8-10,Smartphone,"Entertainment (gaming, streaming, social media...","Streaming (e.g., YouTube, Netflix)",Late night (10 PM–6 AM),Quite workplace,Moderately productive,More than 1 hour,"None, i prefer to work without any strategies",Ignore them until my task is completed,"No, i do not use them"
2,2,45 and above,Female,Graduate,Professional,4–6,Smartphone,Academic/Work-related,"Social Media (e.g., Facebook, Instagram, Linke...",Afternoon (12 PM–6 PM),I can work in any environment,Moderately productive,10–30 minutes,Take regular breaks,Check them briefly and resume my work,"No, i do not use them"
3,3,25–34,Male,Undergraduate,Professional,8-10,Laptop/PC,Academic/Work-related,"Social Media (e.g., Facebook, Instagram, Linke...",Afternoon (12 PM–6 PM),Quite workplace,"Extremely productive, i efficiently complete m...",10–30 minutes,"None, i prefer to work without any strategies",Ignore them until my task is completed,"No, i do not use them"
4,4,45 and above,Male,Graduate,Professional,8-10,Laptop/PC,Academic/Work-related,"Productivity (e.g., Microsoft Office, Notion)",Afternoon (12 PM–6 PM),I can work in any environment,"Extremely productive, i efficiently complete m...",30–60 minutes,"None, i prefer to work without any strategies",Check them briefly and resume my work,"No, i do not use them"


In [3]:
# Feature Selection

df = df[['Average Screen Time', 'App Category', 'Screen Activity', 'Notification Handling', 'Usage of Productivity Apps', 'Attention Span']]
df.head()

Unnamed: 0,Average Screen Time,App Category,Screen Activity,Notification Handling,Usage of Productivity Apps,Attention Span
0,More than 10,"Social Media (e.g., Facebook, Instagram, Linke...","Entertainment (gaming, streaming, social media...",Check them briefly and resume my work,"Yes, but i did not find them of any help",10–30 minutes
1,8-10,"Streaming (e.g., YouTube, Netflix)","Entertainment (gaming, streaming, social media...",Ignore them until my task is completed,"No, i do not use them",More than 1 hour
2,4–6,"Social Media (e.g., Facebook, Instagram, Linke...",Academic/Work-related,Check them briefly and resume my work,"No, i do not use them",10–30 minutes
3,8-10,"Social Media (e.g., Facebook, Instagram, Linke...",Academic/Work-related,Ignore them until my task is completed,"No, i do not use them",10–30 minutes
4,8-10,"Productivity (e.g., Microsoft Office, Notion)",Academic/Work-related,Check them briefly and resume my work,"No, i do not use them",30–60 minutes


In [4]:
# Handling Missing Values

print(df.isnull().sum())
df = df.dropna()
print()
print(df.isnull().sum())

Average Screen Time           0
App Category                  0
Screen Activity               0
Notification Handling         1
Usage of Productivity Apps    0
Attention Span                0
dtype: int64

Average Screen Time           0
App Category                  0
Screen Activity               0
Notification Handling         0
Usage of Productivity Apps    0
Attention Span                0
dtype: int64


In [5]:
# Analysing the data type

df.dtypes

Average Screen Time           object
App Category                  object
Screen Activity               object
Notification Handling         object
Usage of Productivity Apps    object
Attention Span                object
dtype: object

In [6]:
# Data Simplification: Reduced Dimensionality helps generalize better
# Enables clearer behaviour grouping 

df['uses_productivity_apps'] = df['Usage of Productivity Apps'].map({
    'No, i do not use them' : 2,
    'Yes, but i did not find them of any help': 1, 
    'Yes, they are extremely helpful':0 
})
df = df.drop('Usage of Productivity Apps', axis=1)

In [7]:
df['notification_response'] = df['Notification Handling'].map({
    'Turn off notifications altogether' : 0,
    'Ignore them until my task is completed' : 1,
    'Check them briefly and resume my work' : 2,
    'Spend time interacting with the notifications' : 3
})
df = df.drop('Notification Handling', axis=1)

In [8]:
df['Average Screen Time'] = df['Average Screen Time'].str.replace('–', '-', regex=False) # Data is having a symbol issue so setting it to '-' to maintain consistency
df['average_screen_time'] = df['Average Screen Time'].map( {
    'Less than 2': 1,
    '2-4': 3,
    '4-6': 5,
    '6-8': 7,
    '8-10': 9,
    'More than 10': 11
})
df = df.drop('Average Screen Time', axis=1)

In [9]:
df['screen_activity_numeric'] = df['Screen Activity'].map({
    'Entertainment (gaming, streaming, social media, etc.)': 0,
    'Academic/Work-related': 1
})
df = df.drop('Screen Activity', axis=1)

In [10]:
df['app_group'] = df['App Category'].map({
    'Social Media (e.g., Facebook, Instagram, LinkedIn, Twitter)': 'Leisure',
    'Streaming (e.g., YouTube, Netflix)': 'Leisure',
    'Gaming': 'Leisure',
    'Messaging (e.g., WhatsApp, Messenger)': 'Utility',
    'Productivity (e.g., Microsoft Office, Notion)': 'Utility'
})
df = pd.get_dummies(df, columns=['app_group'], drop_first=True) # Find unique value, create new column, then it drops the alphabetically first value i.e., Liesure
df['app_group_Utility'] = df['app_group_Utility'].astype(int) # Set the value to 0 or 1
df = df.drop('App Category', axis=1)

In [11]:
df['attention_span'] = df['Attention Span'].map({
    'Less than 10 minutes': 0,
    '10–30 minutes': 1,
    '30–60 minutes': 2,
    'More than 1 hour': 3
})
df = df.drop('Attention Span', axis=1)

In [12]:
df.head()

Unnamed: 0,uses_productivity_apps,notification_response,average_screen_time,screen_activity_numeric,app_group_Utility,attention_span
0,1,2,11,0,0,1
1,2,1,9,0,0,3
2,2,2,5,1,0,1
3,2,1,9,1,0,1
4,2,2,9,1,1,2


In [13]:
df.to_csv("prepared_data.csv", index=False)