In [None]:
# Mounting Google Drive/Folder containing Dataset
from google.colab import drive
drive.mount('/content/drive')

%cd '/content/drive/MyDrive/I310D/Data_Science_Girlies'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/.shortcut-targets-by-id/1hiAuqfreGL6LOK3TaTCcr_I7tHeHvhdA/Data_Science_Girlies


In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
import tensorflow_datasets as tfds
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Concatenate
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
# Importing dataset as CSV file
dummy_df = pd.read_csv('/content/drive//MyDrive/I310D/Data_Science_Girlies/dummy_data.csv')
dummy_df.head()

# Check to see if data is clean
# df.isnull().sum()
# df.duplicated().sum()

Unnamed: 0,age,gender,time_spent,platform,interests,location,demographics,profession,income,indebt,isHomeOwner,Owns_Car
0,56,male,3,Instagram,Sports,United Kingdom,Urban,Software Engineer,19774,True,False,False
1,46,female,2,Facebook,Travel,United Kingdom,Urban,Student,10564,True,True,True
2,32,male,8,Instagram,Sports,Australia,Sub_Urban,Marketer Manager,13258,False,False,False
3,60,non-binary,5,Instagram,Travel,United Kingdom,Urban,Student,12500,False,True,False
4,25,male,1,Instagram,Lifestlye,Australia,Urban,Software Engineer,14566,False,True,True


In [None]:
# Encode 'age' and 'income' data by 3 quantiles
dummy_df['age_range'] = pd.qcut(dummy_df['age'], q=3, labels=['Young', 'Adult', 'Old'])
dummy_df['income_type'] = pd.qcut(dummy_df['income'], q=3, labels=['Poor', 'Mid', 'Rich'])

dummy_df.head()

Unnamed: 0,age,gender,time_spent,platform,interests,location,demographics,profession,income,indebt,isHomeOwner,Owns_Car,age_range,income_type
0,56,male,3,Instagram,Sports,United Kingdom,Urban,Software Engineer,19774,True,False,False,Old,Rich
1,46,female,2,Facebook,Travel,United Kingdom,Urban,Student,10564,True,True,True,Adult,Poor
2,32,male,8,Instagram,Sports,Australia,Sub_Urban,Marketer Manager,13258,False,False,False,Young,Poor
3,60,non-binary,5,Instagram,Travel,United Kingdom,Urban,Student,12500,False,True,False,Old,Poor
4,25,male,1,Instagram,Lifestlye,Australia,Urban,Software Engineer,14566,False,True,True,Young,Mid


In [None]:
# Encoding all inputs using OneHotEncoding
features_df = dummy_df[['gender', 'platform', 'interests',
                        'location', 'demographics', 'profession',
                        'indebt', 'isHomeOwner', 'Owns_Car',
                        'age_range', 'income_type']]

ohe = OneHotEncoder()
encoded_df = ohe.fit_transform(features_df)

features = encoded_df.toarray()
labels = dummy_df['time_spent'].to_numpy()

x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

In [None]:
# Get column names after one-hot encoding
encoded_columns = ohe.get_feature_names_out(input_features=features_df.columns)

# Create DataFrame from encoded features
encoded_df = pd.DataFrame(features, columns=encoded_columns)
encoded_df.head()


Unnamed: 0,gender_female,gender_male,gender_non-binary,platform_Facebook,platform_Instagram,platform_YouTube,interests_Lifestlye,interests_Sports,interests_Travel,location_Australia,...,isHomeOwner_False,isHomeOwner_True,Owns_Car_False,Owns_Car_True,age_range_Adult,age_range_Old,age_range_Young,income_type_Mid,income_type_Poor,income_type_Rich
0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
1,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0
2,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,...,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
3,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,...,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
4,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,...,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0


In [None]:
model = Sequential()

model.add(Input(shape=(30, )))
model.add(Dense(21, activation='relu'))
model.add(Dense(1))

In [None]:
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mse'])
model.summary()

early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
model.fit(x_train, y_train, batch_size=32, epochs=20, validation_data=(x_test, y_test), callbacks=[early_stopping])

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_2 (Dense)             (None, 21)                651       
                                                                 
 dense_3 (Dense)             (None, 1)                 22        
                                                                 
Total params: 673 (2.63 KB)
Trainable params: 673 (2.63 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20


<keras.src.callbacks.History at 0x7e818d8ead40>

In [None]:
loss, accuracy = model.evaluate(x_test, y_test)
print("RMSE:", np.sqrt(accuracy))

RMSE: 2.5093243281472777


In [None]:
# UI Fields
age = 20 # @param {type:"number"}
gender = 'female' # @param ["male", "female", "non-binary"]
platform = 'Facebook' # @param ["Instagram", "Facebook", "YouTube"]
interests = "Travel" # @param ["Sports", "Travel", "Lifestlye"]
location = 'United States' # @param ["United States", "United Kingdom", "Australia"]
demographics = "Urban" # @param ["Urban", "Sub_Urban", "Rural"]
profession = "Student" # @param ["Software Engineer", "Student", "Marketer Manager"]
income = 1000000 # @param {type:"number"}
inDebt = False # @param ["False", "True"] {type:"raw"}
isHomeOwner = True # @param ["False", "True"] {type:"raw"}
Owns_Car = True # @param ["False", "True"] {type:"raw"}

input_data = {
    'age': [age],
    'gender': [gender],
    'platform': [platform],
    'interests': [interests],
    'location': [location],
    'demographics': [demographics],
    'profession': [profession],
    'income': [income],
    'indebt': [inDebt],
    'isHomeOwner': [isHomeOwner],
    'Owns_Car': [Owns_Car]
    }

# Import both Training and Input Data
dummy_df = pd.read_csv('/content/drive//MyDrive/I310D/Data_Science_Girlies/dummy_data.csv')
input_df = pd.DataFrame(input_data)
print(f'Input Data\n {input_df}')
print('-' * 80)

# Combine both Input and Training Data to Categorize 'age' and 'income'
combined_df = pd.concat([dummy_df, input_df])

age_quartiles = list(pd.qcut(combined_df['age'].values, q=3).categories)
age_bins = [0] + [age.right for age in age_quartiles]
input_df['age_range'] = pd.cut(input_df['age'], bins=age_bins, labels=['Young', 'Adult', 'Old'])

income_quartiles = list(pd.qcut(combined_df['income'].values, q=3).categories)
income_bins = [0] + [income.right for income in income_quartiles]
input_df['income_type'] = pd.cut(input_df['age'], bins=income_bins, labels=['Poor', 'Mid', 'Rich'])

# Encode Input Data after Categorization
input_df = input_df.drop(['age', 'income'], axis=1)
encoded_df = ohe.transform(input_df)

# print('FINAL OUTPUT')
# # predict using regression the time_spent based on the test case values
predicted_screen_time = model.predict(encoded_df)
# predicted_screen_time
print(f"Predicted screen time from Neural Network: {predicted_screen_time[0][0]} hours")

Input Data
    age  gender  platform interests       location demographics profession  \
0   20  female  Facebook    Travel  United States        Urban    Student   

    income  indebt  isHomeOwner  Owns_Car  
0  1000000   False         True      True  
--------------------------------------------------------------------------------
Predicted screen time from Neural Network: 5.188096523284912 hours
