<a href="https://colab.research.google.com/github/arpita0911/student-daily-routine-model/blob/main/Arpita_Singh_aiml_assignment3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Student's daily routine analysis**

## Importing Libraries



In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import joblib




## Loading Dataset

In [7]:
data = pd.read_csv('/content/student_routine_data.csv')

### First 5 rows

In [8]:
print("Original Data:")
print(data.head())

Original Data:
  sleep_hours  sleep_time wake_up_time   exam_sleep_hours study_hours  \
0   6-8 hours     12-2 am       6-8 am          4-6 hours   1-2 hours   
1   6-8 hours  after 2 am      8-10 am  less than 4 hours   2-4 hours   
2   6-8 hours     12-2 am      8-10 am          6-8 hours   4-6 hours   
3   4-6 hours     12-2 am       6-8 am  less than 4 hours   4-6 hours   
4   6-8 hours     12-2 am      8-10 am          4-6 hours   2-4 hours   

       schedule_type class_hours  assignment_hours social_media_hours  \
0  flexible schedule   5-7 hours         1-2 hours          1-2 hours   
1  flexible schedule   7-9 hours  less than 1 hour          1-2 hours   
2  flexible schedule   5-7 hours         1-2 hours          2-4 hours   
3  flexible schedule   5-7 hours         1-2 hours          1-2 hours   
4  flexible schedule   5-7 hours         1-2 hours          1-2 hours   

      hobbies_hours exercise_participation exercise_hours   break_frequency  \
0  less than 1 hour         

## Data Preprocessing

### Converting Categorical Values to Numerical Values

In [9]:
expected_columns = ['sleep_hours', 'class_hours', 'assignment_hours', 'exam_sleep_hours',
                    'study_hours', 'social_media_hours', 'hobbies_hours',
                    'exercise_hours', 'mental_hours', 'meal_hours', 'sleep_time', 'wake_up_time']

# Inspect unique values in relevant columns
for column in expected_columns:
    print(f"Unique values in {column}: {data[column].unique()}")

# Mapping time duration to numeric values
def map_hours(value):

    if value == '6-8 hours':
        return 7
    elif value == 'less than 4 hours':
        return 3
    elif value == '4-6 hours':
        return 5
    elif value == '8+ hours':
        return 9
    elif value == '1-2 hours':
        return 2
    elif value == '2-4 hours':
        return 3
    elif value == '6+ hours':
        return 7
    elif value == '3+ hours':
        return 4
    elif value == '2+ hours':
        return 3
    elif value == 'more than 6 hours':
        return 7
    elif value == '1-3 hours':
        return 2
    elif value == '3-5 hours':
        return 4
    elif value == '5-7 hours':
        return 6
    elif value == '7-9 hours':
        return 8
    elif value == 'less than 1 hour':
        return 0
    elif value == 'less than 2 hours':
        return 1
    elif value == 'less than 30 minutes':
        return 0
    elif value == '30 minutes to 1 hour':
        return 1
    elif value == '2-3 hours':
        return 3
    elif value == '3-4 hours':
        return 4
    elif value == '7-9 hours':
        return 8
    elif value == '12-2 am':
        return 1  # Midpoint is 1:00 am
    elif value == '8-10 pm':
        return 21  # Midpoint is 9:00 pm
    elif value == '10-12 pm':
        return 23  # Midpoint is 11:00 pm
    elif value == 'after 2 am':
        return 3  # Represents 3:00 am
    elif value == '4-6 am':
        return 5  # Represents 5:00 am
    elif value == '6-8 am':
        return 7  # Represents 7:00 am
    elif value == '8-10 am':
        return 9  # Represents 9:00 am
    elif value == 'after 10 am':
        return 11  # Represents 11:00 am

    return None  # Return None for unmatched values

# Replace existing columns with numeric versions
for column in expected_columns:
    if column in data.columns:
        data[column] = data[column].apply(map_hours)

# Fill missing values in the 'exercise_hours' column with 0
data['exercise_hours'].fillna(0, inplace=True)

# Mapping for ordinal encoding
ordinal_mapping = {
    'no schedule': 0,
    'flexible schedule': 1,
    'strict schedule': 2,
    'no': 0,
    'sometimes': 1,
    'yes': 2,
    'never': 0,
    'rarely': 1,
    'every hour': 2,
    'every 30 minutes': 3,
    'morning': 0,
    'afternoon': 1,
    'evening': 2,
    'night': 3
}

# Apply the mapping to the 'schedule_type' column
data['schedule_type'] = data['schedule_type'].map(ordinal_mapping)
# Apply the mapping to the 'participation' column
data['exercise_participation'] = data['exercise_participation'].map(ordinal_mapping)
# Apply the mapping to the 'break_frequency' column
data['break_frequency'] = data['break_frequency'].map(ordinal_mapping)
# Apply the mapping to the 'active_period' column
data['active_period'] = data['active_period'].map(ordinal_mapping)
# Apply the mapping to the 'time_efficiency' column
data['time_efficiency'] = data['time_efficiency'].map(ordinal_mapping)


Unique values in sleep_hours: ['6-8 hours' '4-6 hours' '8+ hours' 'less than 4 hours']
Unique values in class_hours: ['5-7 hours' '7-9 hours' '3-5 hours' '1-3 hours']
Unique values in assignment_hours: ['1-2 hours' 'less than 1 hour' '2-3 hours' '3-4 hours']
Unique values in exam_sleep_hours: ['4-6 hours' 'less than 4 hours' '6-8 hours' '8+ hours']
Unique values in study_hours: ['1-2 hours' '2-4 hours' '4-6 hours' '6+ hours']
Unique values in social_media_hours: ['1-2 hours' '2-4 hours' '4-6 hours' 'more than 6 hours']
Unique values in hobbies_hours: ['less than 1 hour' '1-2 hours' '2-3 hours' '3+ hours']
Unique values in exercise_hours: [nan '2-4 hours' '6-8 hours' '4-6 hours' '8+ hours']
Unique values in mental_hours: ['less than 2 hours' '2-4 hours' '6+ hours' '4-6 hours']
Unique values in meal_hours: ['2+ hours' '30 minutes to 1 hour' 'less than 30 minutes' '1-2 hours']
Unique values in sleep_time: ['12-2 am' 'after 2 am' '10-12 pm' '8-10 pm']
Unique values in wake_up_time: ['6-8 a

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['exercise_hours'].fillna(0, inplace=True)


### Transformed DataFrame (first 5 rows)

In [10]:
print("\nTransformed Data:")
print(data.head())


Transformed Data:
   sleep_hours  sleep_time  wake_up_time  exam_sleep_hours  study_hours  \
0            7           1             7                 5            2   
1            7           3             9                 3            3   
2            7           1             9                 7            5   
3            5           1             7                 3            5   
4            7           1             9                 5            3   

   schedule_type  class_hours  assignment_hours  social_media_hours  \
0              1            6                 2                   2   
1              1            8                 0                   2   
2              1            6                 2                   3   
3              1            6                 2                   2   
4              1            6                 2                   2   

   hobbies_hours  exercise_participation  exercise_hours  break_frequency  \
0              0          

## Model Building

###  Shift target variables by one day to create next-day targets

In [11]:
data['next_day_sleep_hours'] = data['sleep_hours'].shift(-1)
data['next_day_study_hours'] = data['study_hours'].shift(-1)
data['next_day_social_media_hours'] = data['social_media_hours'].shift(-1)
data['next_day_exercise_hours'] = data['exercise_hours'].shift(-1)
data['next_day_hobbies_hours'] = data['hobbies_hours'].shift(-1)

### Drop the last row with NaN values (because there's no next day data for it)

In [12]:
data = data.dropna()

###  Define features (X) as today's data and targets (y) for next day's subset of routine

In [13]:
X = data[['sleep_hours', 'sleep_time', 'wake_up_time', 'exam_sleep_hours',
          'study_hours', 'schedule_type', 'class_hours', 'assignment_hours',
          'social_media_hours', 'hobbies_hours', 'exercise_participation',
          'exercise_hours', 'break_frequency', 'mental_hours',
          'active_period', 'meal_hours', 'time_efficiency']]

y = data[['next_day_sleep_hours', 'next_day_study_hours',
          'next_day_social_media_hours', 'next_day_exercise_hours',
          'next_day_hobbies_hours']]

### Split the data into training and testing sets

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

###  Initialize and train the model

In [15]:
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

joblib.dump(model, 'model.pkl')

import os
model_path = os.path.join(os.getcwd(), 'model.pkl')
print(model_path)

/content/model.pkl


### Predict the next day's specific routine hours for the test set

In [16]:
y_pred = model.predict(X_test)

### Evaluate the model's performance using Mean Squared Error (MSE)

In [17]:
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")


Mean Squared Error: 2.63


### Display predictions for the first few entries

In [18]:
predicted_df = pd.DataFrame(y_pred, columns=y.columns)
print("Predicted next day routine (first 5 rows):")
print(predicted_df.head())

print("Actual next day routine (first 5 rows):")
print(y_test.head())

Predicted next day routine (first 5 rows):
   next_day_sleep_hours  next_day_study_hours  next_day_social_media_hours  \
0                  6.94                  4.51                         3.53   
1                  6.58                  3.51                         2.40   
2                  6.66                  3.51                         2.55   
3                  5.74                  3.63                         3.86   
4                  6.24                  3.27                         2.86   

   next_day_exercise_hours  next_day_hobbies_hours  
0                     3.83                    1.06  
1                     2.19                    0.57  
2                     2.68                    1.25  
3                     3.34                    0.52  
4                     2.76                    1.08  
Actual next day routine (first 5 rows):
    next_day_sleep_hours  next_day_study_hours  next_day_social_media_hours  \
62                   3.0                   2.0     

##User Interface

In [19]:
!pip install ipywidgets


Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets)
  Downloading jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)
Downloading jedi-0.19.2-py2.py3-none-any.whl (1.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m18.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: jedi
Successfully installed jedi-0.19.2


In [20]:
import joblib
import numpy as np
import ipywidgets as widgets
from IPython.display import display

# Load the model
model = joblib.load('model.pkl')

# Define widgets for each input
inputs = {}
for feature in [
    'sleep_hours', 'sleep_time', 'wake_up_time', 'exam_sleep_hours',
    'study_hours', 'schedule_type', 'class_hours', 'assignment_hours',
    'social_media_hours', 'hobbies_hours', 'exercise_participation',
    'exercise_hours', 'break_frequency', 'mental_hours',
    'active_period', 'meal_hours', 'time_efficiency'
]:
    inputs[feature] = widgets.FloatText(description=feature)

# Define button and output display
button = widgets.Button(description="Predict")
output = widgets.Output()

# Define prediction function
def on_button_click(b):
    input_data = np.array([inputs[feature].value for feature in inputs]).reshape(1, -1)
    prediction = model.predict(input_data)

    # Display results
    with output:
        output.clear_output()
        print("Next Day's Routine Prediction:")
        for i, target in enumerate([
            'next_day_sleep_hours', 'next_day_study_hours', 'next_day_social_media_hours',
            'next_day_exercise_hours', 'next_day_hobbies_hours'
        ]):
            print(f"{target}: {prediction[0][i]:.2f}")

# Bind prediction function to button
button.on_click(on_button_click)

# Display widgets
display(*inputs.values(), button, output)


FloatText(value=0.0, description='sleep_hours')

FloatText(value=0.0, description='sleep_time')

FloatText(value=0.0, description='wake_up_time')

FloatText(value=0.0, description='exam_sleep_hours')

FloatText(value=0.0, description='study_hours')

FloatText(value=0.0, description='schedule_type')

FloatText(value=0.0, description='class_hours')

FloatText(value=0.0, description='assignment_hours')

FloatText(value=0.0, description='social_media_hours')

FloatText(value=0.0, description='hobbies_hours')

FloatText(value=0.0, description='exercise_participation')

FloatText(value=0.0, description='exercise_hours')

FloatText(value=0.0, description='break_frequency')

FloatText(value=0.0, description='mental_hours')

FloatText(value=0.0, description='active_period')

FloatText(value=0.0, description='meal_hours')

FloatText(value=0.0, description='time_efficiency')

Button(description='Predict', style=ButtonStyle())

Output()



##Association Rules

In [23]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from sklearn.preprocessing import KBinsDiscretizer

frequent_itemsets = apriori(onehot_data, min_support=0.1, use_colnames=True)

num_itemsets = len(frequent_itemsets)

rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0, num_itemsets=num_itemsets)

print(rules)

  and should_run_async(code)


                      antecedents  \
0                (sleep_time_0.0)   
1               (sleep_hours_1.0)   
2               (sleep_hours_1.0)   
3              (wake_up_time_1.0)   
4               (sleep_hours_1.0)   
...                           ...   
98197  (next_day_sleep_hours_3.0)   
98198           (sleep_hours_3.0)   
98199       (time_efficiency_4.0)   
98200        (exercise_hours_1.0)   
98201         (active_period_0.0)   

                                             consequents  antecedent support  \
0                                      (sleep_hours_1.0)            0.602041   
1                                       (sleep_time_0.0)            0.316327   
2                                     (wake_up_time_1.0)            0.316327   
3                                      (sleep_hours_1.0)            0.510204   
4                                 (exam_sleep_hours_0.0)            0.316327   
...                                                  ...                 ..